From f9b8f9f8c6298326397769b5d00f4b3593b3eaa2 Mon Sep 17 00:00:00 2001 From: Piotr Balcer Date: Mon, 4 Dec 2023 12:27:46 +0100 Subject: [PATCH 01/12] Merge pull request #1108 from AllanZyne/review/yang/urAdapterGet [UR] Fix urAdapterGet ignores NumEntries --- scripts/templates/ldrddi.cpp.mako | 3 +++ source/loader/ur_ldrddi.cpp | 3 +++ 2 files changed, 6 insertions(+) diff --git a/scripts/templates/ldrddi.cpp.mako b/scripts/templates/ldrddi.cpp.mako index 0498ba00dc..c548b14b32 100644 --- a/scripts/templates/ldrddi.cpp.mako +++ b/scripts/templates/ldrddi.cpp.mako @@ -86,6 +86,9 @@ namespace ur_loader break; } adapterIndex++; + if (adapterIndex == NumEntries) { + break; + } } } diff --git a/source/loader/ur_ldrddi.cpp b/source/loader/ur_ldrddi.cpp index 649fc0ad88..9d3a0bc695 100644 --- a/source/loader/ur_ldrddi.cpp +++ b/source/loader/ur_ldrddi.cpp @@ -100,6 +100,9 @@ __urdlllocal ur_result_t UR_APICALL urAdapterGet( break; } adapterIndex++; + if (adapterIndex == NumEntries) { + break; + } } } From a0bef5624287bb65807bf06a1954ed6bcf3c0cd4 Mon Sep 17 00:00:00 2001 From: "Kenneth Benzie (Benie)" Date: Wed, 6 Dec 2023 13:46:01 +0000 Subject: [PATCH 02/12] Merge pull request #988 from MartinWehking/adapters Enable fp16 runtime support for hip --- source/adapters/hip/device.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/source/adapters/hip/device.cpp b/source/adapters/hip/device.cpp index 5b473c050e..139906e95a 100644 --- a/source/adapters/hip/device.cpp +++ b/source/adapters/hip/device.cpp @@ -549,6 +549,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, SupportedExtensions += "cl_khr_fp64 "; } + SupportedExtensions += "cl_khr_fp16 "; + return ReturnValue(SupportedExtensions.c_str()); } case UR_DEVICE_INFO_PRINTF_BUFFER_SIZE: { From 4798206769c5652b540275f4a10015d438d84731 Mon Sep 17 00:00:00 2001 From: "Kenneth Benzie (Benie)" Date: Wed, 6 Dec 2023 15:59:11 +0000 Subject: [PATCH 03/12] Merge pull request #1071 from igchor/tracking [UR] Enable UMF tracking by default --- CMakeLists.txt | 2 +- test/conformance/usm/usm_adapter_cuda.match | 40 +------------------ .../usm/usm_adapter_level_zero.match | 25 ------------ 3 files changed, 2 insertions(+), 65 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9b243f206c..32fdae84f7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -34,7 +34,7 @@ option(UR_USE_MSAN "enable MemorySanitizer" OFF) option(UR_USE_TSAN "enable ThreadSanitizer" OFF) option(UR_ENABLE_TRACING "enable api tracing through xpti" OFF) option(UMF_BUILD_SHARED_LIBRARY "Build UMF as shared library" OFF) -option(UMF_ENABLE_POOL_TRACKING "Build UMF with pool tracking" OFF) +option(UMF_ENABLE_POOL_TRACKING "Build UMF with pool tracking" ON) option(UR_BUILD_ADAPTER_L0 "build level 0 adapter from SYCL" OFF) option(UR_BUILD_ADAPTER_OPENCL "build opencl adapter from SYCL" OFF) option(UR_BUILD_ADAPTER_CUDA "build cuda adapter from SYCL" OFF) diff --git a/test/conformance/usm/usm_adapter_cuda.match b/test/conformance/usm/usm_adapter_cuda.match index e2ba6b6f63..15b68f5c6c 100644 --- a/test/conformance/usm/usm_adapter_cuda.match +++ b/test/conformance/usm/usm_adapter_cuda.match @@ -1,45 +1,7 @@ -urUSMDeviceAllocTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled -urUSMDeviceAllocTest.SuccessWithDescriptors/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled -urUSMDeviceAllocTest.InvalidNullHandleContext/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled -urUSMDeviceAllocTest.InvalidNullHandleDevice/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled -urUSMDeviceAllocTest.InvalidNullPtrResult/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled -urUSMDeviceAllocTest.InvalidUSMSize/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled +{{OPT}}urUSMDeviceAllocTest.InvalidUSMSize/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled {{OPT}}urUSMDeviceAllocTest.InvalidUSMSize/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolDisabled -{{OPT}}urUSMDeviceAllocTest.InvalidValueAlignPowerOfTwo/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled -{{OPT}}urUSMAllocInfoTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}___UR_USM_ALLOC_INFO_BASE_PTR -{{OPT}}urUSMAllocInfoTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}___UR_USM_ALLOC_INFO_SIZE -{{OPT}}urUSMAllocInfoTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}___UR_USM_ALLOC_INFO_POOL -{{OPT}}urUSMHostAllocTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled -{{OPT}}urUSMHostAllocTest.SuccessWithDescriptors/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled -{{OPT}}urUSMHostAllocTest.InvalidNullHandleContext/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled -{{OPT}}urUSMHostAllocTest.InvalidNullPtrMem/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled {{OPT}}urUSMHostAllocTest.InvalidUSMSize/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled {{OPT}}urUSMHostAllocTest.InvalidUSMSize/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolDisabled -{{OPT}}urUSMHostAllocTest.InvalidValueAlignPowerOfTwo/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled -{{OPT}}urUSMPoolCreateTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}_ {{OPT}}urUSMPoolCreateTest.SuccessWithFlag/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urUSMPoolCreateTest.InvalidNullHandleContext/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urUSMPoolCreateTest.InvalidNullPointerPoolDesc/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urUSMPoolCreateTest.InvalidNullPointerPool/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urUSMPoolCreateTest.InvalidEnumerationFlags/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urUSMPoolGetInfoTestWithInfoParam.Success/NVIDIA_CUDA_BACKEND___{{.*}}___UR_USM_POOL_INFO_CONTEXT -{{OPT}}urUSMPoolGetInfoTestWithInfoParam.Success/NVIDIA_CUDA_BACKEND___{{.*}}___UR_USM_POOL_INFO_REFERENCE_COUNT -{{OPT}}urUSMPoolGetInfoTest.InvalidNullHandlePool/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urUSMPoolGetInfoTest.InvalidEnumerationProperty/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urUSMPoolGetInfoTest.InvalidSizeZero/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urUSMPoolGetInfoTest.InvalidSizeTooSmall/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urUSMPoolGetInfoTest.InvalidNullPointerPropValue/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urUSMPoolGetInfoTest.InvalidNullPointerPropSizeRet/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urUSMPoolDestroyTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urUSMPoolDestroyTest.InvalidNullHandleContext/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urUSMPoolRetainTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urUSMPoolRetainTest.InvalidNullHandlePool/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urUSMSharedAllocTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled -{{OPT}}urUSMSharedAllocTest.SuccessWithDescriptors/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled -{{OPT}}urUSMSharedAllocTest.SuccessWithMultipleAdvices/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled -{{OPT}}urUSMSharedAllocTest.InvalidNullHandleContext/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled -{{OPT}}urUSMSharedAllocTest.InvalidNullHandleDevice/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled -{{OPT}}urUSMSharedAllocTest.InvalidNullPtrMem/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled {{OPT}}urUSMSharedAllocTest.InvalidUSMSize/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled {{OPT}}urUSMSharedAllocTest.InvalidUSMSize/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolDisabled -{{OPT}}urUSMSharedAllocTest.InvalidValueAlignPowerOfTwo/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled diff --git a/test/conformance/usm/usm_adapter_level_zero.match b/test/conformance/usm/usm_adapter_level_zero.match index 9e275d805e..bf45b83ec2 100644 --- a/test/conformance/usm/usm_adapter_level_zero.match +++ b/test/conformance/usm/usm_adapter_level_zero.match @@ -1,36 +1,11 @@ -urUSMDeviceAllocTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolEnabled -urUSMDeviceAllocTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolDisabled -urUSMDeviceAllocTest.SuccessWithDescriptors/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolEnabled -urUSMDeviceAllocTest.SuccessWithDescriptors/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolDisabled urUSMDeviceAllocTest.InvalidUSMSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolEnabled urUSMDeviceAllocTest.InvalidUSMSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolDisabled -urUSMFreeTest.SuccessDeviceAlloc/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urUSMFreeTest.SuccessHostAlloc/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urUSMFreeTest.SuccessSharedAlloc/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urUSMAllocInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_USM_ALLOC_INFO_TYPE -urUSMAllocInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_USM_ALLOC_INFO_BASE_PTR -urUSMAllocInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_USM_ALLOC_INFO_SIZE -urUSMAllocInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_USM_ALLOC_INFO_DEVICE urUSMAllocInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_USM_ALLOC_INFO_POOL -urUSMGetMemAllocInfoTest.InvalidNullHandleContext/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urUSMGetMemAllocInfoTest.InvalidNullPointerMem/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urUSMGetMemAllocInfoTest.InvalidEnumeration/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urUSMGetMemAllocInfoTest.InvalidValuePropSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urUSMHostAllocTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolEnabled -urUSMHostAllocTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolDisabled -urUSMHostAllocTest.SuccessWithDescriptors/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolEnabled -urUSMHostAllocTest.SuccessWithDescriptors/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolDisabled urUSMHostAllocTest.InvalidUSMSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolEnabled urUSMHostAllocTest.InvalidUSMSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolDisabled urUSMPoolGetInfoTestWithInfoParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_USM_POOL_INFO_CONTEXT urUSMPoolGetInfoTestWithInfoParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_USM_POOL_INFO_REFERENCE_COUNT urUSMPoolGetInfoTest.InvalidSizeTooSmall/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urUSMPoolRetainTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urUSMSharedAllocTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolEnabled -urUSMSharedAllocTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolDisabled -urUSMSharedAllocTest.SuccessWithDescriptors/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolEnabled -urUSMSharedAllocTest.SuccessWithDescriptors/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolDisabled -urUSMSharedAllocTest.SuccessWithMultipleAdvices/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolEnabled -urUSMSharedAllocTest.SuccessWithMultipleAdvices/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolDisabled urUSMSharedAllocTest.InvalidUSMSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolEnabled urUSMSharedAllocTest.InvalidUSMSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolDisabled From 1584019e03fe0b161c6dbfa7224361c07ded067c Mon Sep 17 00:00:00 2001 From: "Kenneth Benzie (Benie)" Date: Wed, 6 Dec 2023 17:07:30 +0000 Subject: [PATCH 04/12] Merge pull request #916 from nrspruit/memFreeCheckGlobal [UR][L0] Check Global Mem Size as Limit for Free Memory --- source/adapters/level_zero/device.cpp | 35 ++++++++++++++++++--------- source/adapters/level_zero/device.hpp | 5 ++++ 2 files changed, 28 insertions(+), 12 deletions(-) diff --git a/source/adapters/level_zero/device.cpp b/source/adapters/level_zero/device.cpp index f5b00d80cc..ec6a294c21 100644 --- a/source/adapters/level_zero/device.cpp +++ b/source/adapters/level_zero/device.cpp @@ -88,6 +88,24 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGet( return UR_RESULT_SUCCESS; } +uint64_t calculateGlobalMemSize(ur_device_handle_t Device) { + // Cache GlobalMemSize + Device->ZeGlobalMemSize.Compute = + [Device](struct ze_global_memsize &GlobalMemSize) { + for (const auto &ZeDeviceMemoryExtProperty : + Device->ZeDeviceMemoryProperties->second) { + GlobalMemSize.value += ZeDeviceMemoryExtProperty.physicalSize; + } + if (GlobalMemSize.value == 0) { + for (const auto &ZeDeviceMemoryProperty : + Device->ZeDeviceMemoryProperties->first) { + GlobalMemSize.value += ZeDeviceMemoryProperty.totalSize; + } + } + }; + return Device->ZeGlobalMemSize.operator->()->value; +} + UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo( ur_device_handle_t Device, ///< [in] handle of the device instance ur_device_info_t ParamName, ///< [in] type of the info to retrieve @@ -251,20 +269,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo( case UR_DEVICE_INFO_MAX_MEM_ALLOC_SIZE: return ReturnValue(uint64_t{Device->ZeDeviceProperties->maxMemAllocSize}); case UR_DEVICE_INFO_GLOBAL_MEM_SIZE: { - uint64_t GlobalMemSize = 0; // Support to read physicalSize depends on kernel, // so fallback into reading totalSize if physicalSize // is not available. - for (const auto &ZeDeviceMemoryExtProperty : - Device->ZeDeviceMemoryProperties->second) { - GlobalMemSize += ZeDeviceMemoryExtProperty.physicalSize; - } - if (GlobalMemSize == 0) { - for (const auto &ZeDeviceMemoryProperty : - Device->ZeDeviceMemoryProperties->first) { - GlobalMemSize += ZeDeviceMemoryProperty.totalSize; - } - } + uint64_t GlobalMemSize = calculateGlobalMemSize(Device); return ReturnValue(uint64_t{GlobalMemSize}); } case UR_DEVICE_INFO_LOCAL_MEM_SIZE: @@ -637,6 +645,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo( static_cast(ZE_RESULT_ERROR_UNINITIALIZED)); return UR_RESULT_ERROR_ADAPTER_SPECIFIC; } + // Calculate the global memory size as the max limit that can be reported as + // "free" memory for the user to allocate. + uint64_t GlobalMemSize = calculateGlobalMemSize(Device); // Only report device memory which zeMemAllocDevice can allocate from. // Currently this is only the one enumerated with ordinal 0. uint64_t FreeMemory = 0; @@ -661,7 +672,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo( } } } - return ReturnValue(FreeMemory); + return ReturnValue(std::min(GlobalMemSize, FreeMemory)); } case UR_DEVICE_INFO_MEMORY_CLOCK_RATE: { // If there are not any memory modules then return 0. diff --git a/source/adapters/level_zero/device.hpp b/source/adapters/level_zero/device.hpp index 35404c6525..bdae64beba 100644 --- a/source/adapters/level_zero/device.hpp +++ b/source/adapters/level_zero/device.hpp @@ -39,6 +39,10 @@ enum EventsScope { LastCommandInBatchHostVisible }; +struct ze_global_memsize { + uint64_t value; +}; + struct ur_device_handle_t_ : _ur_object { ur_device_handle_t_(ze_device_handle_t Device, ur_platform_handle_t Plt, ur_device_handle_t ParentDevice = nullptr) @@ -170,4 +174,5 @@ struct ur_device_handle_t_ : _ur_object { ZeDeviceMemoryAccessProperties; ZeCache> ZeDeviceCacheProperties; ZeCache> ZeDeviceIpVersionExt; + ZeCache ZeGlobalMemSize; }; From 0b95702d8b95b456113e1a5502d4f543217c54f1 Mon Sep 17 00:00:00 2001 From: "Kenneth Benzie (Benie)" Date: Wed, 6 Dec 2023 17:13:51 +0000 Subject: [PATCH 05/12] Merge pull request #1099 from jandres742/largeallocations [UR][L0] Unify use of large allocation in L0 adapter --- source/adapters/level_zero/device.cpp | 24 +++++++++++++++++++++- source/adapters/level_zero/device.hpp | 16 +++++++++++++++ source/adapters/level_zero/program.cpp | 28 ++++++++++++++++++++++++-- source/adapters/level_zero/usm.cpp | 8 +++++--- 4 files changed, 70 insertions(+), 6 deletions(-) diff --git a/source/adapters/level_zero/device.cpp b/source/adapters/level_zero/device.cpp index ec6a294c21..acc7c755f4 100644 --- a/source/adapters/level_zero/device.cpp +++ b/source/adapters/level_zero/device.cpp @@ -267,7 +267,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo( return ReturnValue(uint32_t{64}); } case UR_DEVICE_INFO_MAX_MEM_ALLOC_SIZE: - return ReturnValue(uint64_t{Device->ZeDeviceProperties->maxMemAllocSize}); + // if not optimized for 32-bit access, return total memory size. + // otherwise, return only maximum allocatable size. + if (Device->useOptimized32bitAccess() == 0) { + return ReturnValue(uint64_t{calculateGlobalMemSize(Device)}); + } else { + return ReturnValue(uint64_t{Device->ZeDeviceProperties->maxMemAllocSize}); + } case UR_DEVICE_INFO_GLOBAL_MEM_SIZE: { // Support to read physicalSize depends on kernel, // so fallback into reading totalSize if physicalSize @@ -911,6 +917,22 @@ ur_device_handle_t_::useImmediateCommandLists() { } } +int32_t ur_device_handle_t_::useOptimized32bitAccess() { + static const int32_t Optimize32bitAccessMode = [this] { + // If device is Intel(R) Data Center GPU Max, + // use default provided by L0 driver. + // TODO: Use IP versioning to select based on range of devices + if (this->isPVC()) + return -1; + const char *UrRet = std::getenv("UR_L0_USE_OPTIMIZED_32BIT_ACCESS"); + if (!UrRet) + return 0; + return std::atoi(UrRet); + }(); + + return Optimize32bitAccessMode; +} + ur_result_t ur_device_handle_t_::initialize(int SubSubDeviceOrdinal, int SubSubDeviceIndex) { // Maintain various device properties cache. diff --git a/source/adapters/level_zero/device.hpp b/source/adapters/level_zero/device.hpp index bdae64beba..5f34efab44 100644 --- a/source/adapters/level_zero/device.hpp +++ b/source/adapters/level_zero/device.hpp @@ -145,6 +145,22 @@ struct ur_device_handle_t_ : _ur_object { // Returns whether immediate command lists are used on this device. ImmCmdlistMode ImmCommandListUsed{}; + // Returns whether large allocations are being used + // or not to have a consistent behavior throughout + // the adapter between the creation of large allocations + // and the compilation of kernels into stateful and + // stateless modes. + // With stateful mode, kernels are compiled with + // pointer-arithmetic optimizations for optimized + // access of allocations smaller than 4GB. + // In stateless mode, such optimizations are not + // applied. + // Even if a GPU supports both modes, L0 driver may + // provide support for only one, like for Intel(R) + // Data Center GPU Max, for which L0 driver only + // supports stateless. + int32_t useOptimized32bitAccess(); + bool isSubDevice() { return RootDevice != nullptr; } // Is this a Data Center GPU Max series (aka PVC)? diff --git a/source/adapters/level_zero/program.cpp b/source/adapters/level_zero/program.cpp index 92a3c87aea..f118a5b9dd 100644 --- a/source/adapters/level_zero/program.cpp +++ b/source/adapters/level_zero/program.cpp @@ -148,9 +148,24 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramBuildExp( ZeModuleDesc.format = (hProgram->State == ur_program_handle_t_::IL) ? ZE_MODULE_FORMAT_IL_SPIRV : ZE_MODULE_FORMAT_NATIVE; + ZeModuleDesc.inputSize = hProgram->CodeLength; ZeModuleDesc.pInputModule = hProgram->Code.get(); - ZeModuleDesc.pBuildFlags = pOptions; + + // if large allocations are selected, then pass + // ze-opt-greater-than-4GB-buffer-required to disable + // stateful optimizations and be able to use larger than + // 4GB allocations on these kernels. + std::string ZeBuildOptions{}; + if (pOptions) { + ZeBuildOptions += pOptions; + } + + if (phDevices[0]->useOptimized32bitAccess() == 0) { + ZeBuildOptions += " -ze-opt-greater-than-4GB-buffer-required"; + } + + ZeModuleDesc.pBuildFlags = ZeBuildOptions.c_str(); ZeModuleDesc.pConstants = Shim.ze(); ze_device_handle_t ZeDevice = phDevices[0]->ZeDevice; @@ -234,8 +249,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCompile( // This produces better code because the driver can do cross-module // optimizations. Therefore, we just remember the compilation flags, so we // can use them later. - if (Options) + if (Options) { Program->BuildFlags = Options; + + // if large allocations are selected, then pass + // ze-opt-greater-than-4GB-buffer-required to disable + // stateful optimizations and be able to use larger than + // 4GB allocations on these kernels. + if (Context->Devices[0]->useOptimized32bitAccess() == 0) { + Program->BuildFlags += " -ze-opt-greater-than-4GB-buffer-required"; + } + } Program->State = ur_program_handle_t_::Object; return UR_RESULT_SUCCESS; diff --git a/source/adapters/level_zero/usm.cpp b/source/adapters/level_zero/usm.cpp index daec0408fb..c6d98855e7 100644 --- a/source/adapters/level_zero/usm.cpp +++ b/source/adapters/level_zero/usm.cpp @@ -178,9 +178,11 @@ static ur_result_t USMDeviceAllocImpl(void **ResultPtr, ZeDesc.flags = 0; ZeDesc.ordinal = 0; - ZeStruct RelaxedDesc; - if (Size > Device->ZeDeviceProperties->maxMemAllocSize) { - // Tell Level-Zero to accept Size > maxMemAllocSize + if (Device->useOptimized32bitAccess() == 0 && + (Size > Device->ZeDeviceProperties->maxMemAllocSize)) { + // Tell Level-Zero to accept Size > maxMemAllocSize if + // large allocations are used. + ZeStruct RelaxedDesc; RelaxedDesc.flags = ZE_RELAXED_ALLOCATION_LIMITS_EXP_FLAG_MAX_SIZE; ZeDesc.pNext = &RelaxedDesc; } From a34764a8ea90741aae885cff8ac66aa4dd1daed5 Mon Sep 17 00:00:00 2001 From: "Kenneth Benzie (Benie)" Date: Fri, 8 Dec 2023 12:18:51 +0000 Subject: [PATCH 06/12] Merge pull request #962 from jandres742/fixwaitbarrierwithevent [UR][L0] Correctly wait on barrier on urEnqueueEventsWaitWithBarrier --- source/adapters/level_zero/event.cpp | 12 +++++++++--- source/adapters/level_zero/queue.hpp | 5 +++++ 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/source/adapters/level_zero/event.cpp b/source/adapters/level_zero/event.cpp index b979c8ab15..d8af1e674d 100644 --- a/source/adapters/level_zero/event.cpp +++ b/source/adapters/level_zero/event.cpp @@ -165,10 +165,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( // event signal because it is already guaranteed that previous commands // in this queue are completed when the signal is started. // + // Only consideration here is that when profiling is used, signalEvent + // cannot be used if EventWaitList.Lenght == 0. In those cases, we need + // to fallback directly to barrier to have correct timestamps. See here: + // https://spec.oneapi.io/level-zero/latest/core/api.html?highlight=appendsignalevent#_CPPv430zeCommandListAppendSignalEvent24ze_command_list_handle_t17ze_event_handle_t + // // TODO: this and other special handling of in-order queues to be // updated when/if Level Zero adds native support for in-order queues. // - if (Queue->isInOrderQueue() && InOrderBarrierBySignal) { + if (Queue->isInOrderQueue() && InOrderBarrierBySignal && + !Queue->isProfilingEnabled()) { if (EventWaitList.Length) { ZE2UR_CALL(zeCommandListAppendWaitOnEvents, (CmdList->first, EventWaitList.Length, @@ -181,6 +187,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( (CmdList->first, Event->ZeEvent, EventWaitList.Length, EventWaitList.ZeEventList)); } + return UR_RESULT_SUCCESS; }; @@ -964,8 +971,7 @@ ur_result_t CleanupCompletedEvent(ur_event_handle_t Event, bool QueueLocked, ur_result_t EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue, bool HostVisible, ur_event_handle_t *RetEvent) { - bool ProfilingEnabled = - !Queue || (Queue->Properties & UR_QUEUE_FLAG_PROFILING_ENABLE) != 0; + bool ProfilingEnabled = !Queue || Queue->isProfilingEnabled(); if (auto CachedEvent = Context->getEventFromContextCache(HostVisible, ProfilingEnabled)) { diff --git a/source/adapters/level_zero/queue.hpp b/source/adapters/level_zero/queue.hpp index 9c90a999b3..306cec5416 100644 --- a/source/adapters/level_zero/queue.hpp +++ b/source/adapters/level_zero/queue.hpp @@ -515,6 +515,11 @@ struct ur_queue_handle_t_ : _ur_object { // lists in the queue. ur_result_t insertStartBarrierIfDiscardEventsMode(ur_command_list_ptr_t &CmdList); + + // returns true if queue has profiling enabled + bool isProfilingEnabled() { + return ((this->Properties & UR_QUEUE_FLAG_PROFILING_ENABLE) != 0); + } }; // This helper function creates a ur_event_handle_t and associate a From eb67d69bc213db7a0781a549dedb5d04e2e0af9d Mon Sep 17 00:00:00 2001 From: "Kenneth Benzie (Benie)" Date: Mon, 11 Dec 2023 12:27:40 +0000 Subject: [PATCH 07/12] Merge pull request #1067 from alexbatashev/fix_native_handles [UR][Loader] Fix handling of native handles --- scripts/templates/ldrddi.cpp.mako | 9 ++- source/loader/ur_ldrddi.cpp | 129 +++--------------------------- 2 files changed, 16 insertions(+), 122 deletions(-) diff --git a/scripts/templates/ldrddi.cpp.mako b/scripts/templates/ldrddi.cpp.mako index c548b14b32..28802f709e 100644 --- a/scripts/templates/ldrddi.cpp.mako +++ b/scripts/templates/ldrddi.cpp.mako @@ -145,14 +145,17 @@ namespace ur_loader %else: <%param_replacements={}%> %for i, item in enumerate(th.get_loader_prologue(n, tags, obj, meta)): - %if 0 == i: + %if not '_native_object_' in item['obj'] or th.make_func_name(n, tags, obj) == 'urPlatformCreateWithNativeHandle': // extract platform's function pointer table auto dditable = reinterpret_cast<${item['obj']}*>( ${item['pointer']}${item['name']} )->dditable; auto ${th.make_pfn_name(n, tags, obj)} = dditable->${n}.${th.get_table_name(n, tags, obj)}.${th.make_pfn_name(n, tags, obj)}; if( nullptr == ${th.make_pfn_name(n, tags, obj)} ) return ${X}_RESULT_ERROR_UNINITIALIZED; + <%break%> %endif + %endfor + %for i, item in enumerate(th.get_loader_prologue(n, tags, obj, meta)): %if 'range' in item: <% add_local = True @@ -161,6 +164,7 @@ namespace ur_loader for( size_t i = ${item['range'][0]}; i < ${item['range'][1]}; ++i ) ${item['name']}Local[ i ] = reinterpret_cast<${item['obj']}*>( ${item['name']}[ i ] )->handle; %else: + %if not '_native_object_' in item['obj'] or th.make_func_name(n, tags, obj) == 'urPlatformCreateWithNativeHandle': // convert loader handle to platform handle %if item['optional']: ${item['name']} = ( ${item['name']} ) ? reinterpret_cast<${item['obj']}*>( ${item['name']} )->handle : nullptr; @@ -168,6 +172,7 @@ namespace ur_loader ${item['name']} = reinterpret_cast<${item['obj']}*>( ${item['name']} )->handle; %endif %endif + %endif %endfor // forward to device-platform @@ -188,7 +193,7 @@ namespace ur_loader %if item['release']: // release loader handle ${item['factory']}.release( ${item['name']} ); - %else: + %elif not '_native_object_' in item['obj'] or th.make_func_name(n, tags, obj) == 'urPlatformCreateWithNativeHandle': try { %if 'range' in item: diff --git a/source/loader/ur_ldrddi.cpp b/source/loader/ur_ldrddi.cpp index 9d3a0bc695..d1fc45dae8 100644 --- a/source/loader/ur_ldrddi.cpp +++ b/source/loader/ur_ldrddi.cpp @@ -387,14 +387,6 @@ __urdlllocal ur_result_t UR_APICALL urPlatformGetNativeHandle( return result; } - try { - // convert platform handle to loader handle - *phNativePlatform = reinterpret_cast( - ur_native_factory.getInstance(*phNativePlatform, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -708,14 +700,6 @@ __urdlllocal ur_result_t UR_APICALL urDeviceGetNativeHandle( return result; } - try { - // convert platform handle to loader handle - *phNativeDevice = reinterpret_cast( - ur_native_factory.getInstance(*phNativeDevice, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -734,17 +718,13 @@ __urdlllocal ur_result_t UR_APICALL urDeviceCreateWithNativeHandle( // extract platform's function pointer table auto dditable = - reinterpret_cast(hNativeDevice)->dditable; + reinterpret_cast(hPlatform)->dditable; auto pfnCreateWithNativeHandle = dditable->ur.Device.pfnCreateWithNativeHandle; if (nullptr == pfnCreateWithNativeHandle) { return UR_RESULT_ERROR_UNINITIALIZED; } - // convert loader handle to platform handle - hNativeDevice = - reinterpret_cast(hNativeDevice)->handle; - // convert loader handle to platform handle hPlatform = reinterpret_cast(hPlatform)->handle; @@ -951,14 +931,6 @@ __urdlllocal ur_result_t UR_APICALL urContextGetNativeHandle( return result; } - try { - // convert platform handle to loader handle - *phNativeContext = reinterpret_cast( - ur_native_factory.getInstance(*phNativeContext, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -979,17 +951,13 @@ __urdlllocal ur_result_t UR_APICALL urContextCreateWithNativeHandle( // extract platform's function pointer table auto dditable = - reinterpret_cast(hNativeContext)->dditable; + reinterpret_cast(*phDevices)->dditable; auto pfnCreateWithNativeHandle = dditable->ur.Context.pfnCreateWithNativeHandle; if (nullptr == pfnCreateWithNativeHandle) { return UR_RESULT_ERROR_UNINITIALIZED; } - // convert loader handle to platform handle - hNativeContext = - reinterpret_cast(hNativeContext)->handle; - // convert loader handles to platform handles auto phDevicesLocal = std::vector(numDevices); for (size_t i = 0; i < numDevices; ++i) { @@ -1242,14 +1210,6 @@ __urdlllocal ur_result_t UR_APICALL urMemGetNativeHandle( return result; } - try { - // convert platform handle to loader handle - *phNativeMem = reinterpret_cast( - ur_native_factory.getInstance(*phNativeMem, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -1267,17 +1227,13 @@ __urdlllocal ur_result_t UR_APICALL urMemBufferCreateWithNativeHandle( ur_result_t result = UR_RESULT_SUCCESS; // extract platform's function pointer table - auto dditable = - reinterpret_cast(hNativeMem)->dditable; + auto dditable = reinterpret_cast(hContext)->dditable; auto pfnBufferCreateWithNativeHandle = dditable->ur.Mem.pfnBufferCreateWithNativeHandle; if (nullptr == pfnBufferCreateWithNativeHandle) { return UR_RESULT_ERROR_UNINITIALIZED; } - // convert loader handle to platform handle - hNativeMem = reinterpret_cast(hNativeMem)->handle; - // convert loader handle to platform handle hContext = reinterpret_cast(hContext)->handle; @@ -1317,17 +1273,13 @@ __urdlllocal ur_result_t UR_APICALL urMemImageCreateWithNativeHandle( ur_result_t result = UR_RESULT_SUCCESS; // extract platform's function pointer table - auto dditable = - reinterpret_cast(hNativeMem)->dditable; + auto dditable = reinterpret_cast(hContext)->dditable; auto pfnImageCreateWithNativeHandle = dditable->ur.Mem.pfnImageCreateWithNativeHandle; if (nullptr == pfnImageCreateWithNativeHandle) { return UR_RESULT_ERROR_UNINITIALIZED; } - // convert loader handle to platform handle - hNativeMem = reinterpret_cast(hNativeMem)->handle; - // convert loader handle to platform handle hContext = reinterpret_cast(hContext)->handle; @@ -1563,14 +1515,6 @@ __urdlllocal ur_result_t UR_APICALL urSamplerGetNativeHandle( return result; } - try { - // convert platform handle to loader handle - *phNativeSampler = reinterpret_cast( - ur_native_factory.getInstance(*phNativeSampler, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -1588,18 +1532,13 @@ __urdlllocal ur_result_t UR_APICALL urSamplerCreateWithNativeHandle( ur_result_t result = UR_RESULT_SUCCESS; // extract platform's function pointer table - auto dditable = - reinterpret_cast(hNativeSampler)->dditable; + auto dditable = reinterpret_cast(hContext)->dditable; auto pfnCreateWithNativeHandle = dditable->ur.Sampler.pfnCreateWithNativeHandle; if (nullptr == pfnCreateWithNativeHandle) { return UR_RESULT_ERROR_UNINITIALIZED; } - // convert loader handle to platform handle - hNativeSampler = - reinterpret_cast(hNativeSampler)->handle; - // convert loader handle to platform handle hContext = reinterpret_cast(hContext)->handle; @@ -2639,14 +2578,6 @@ __urdlllocal ur_result_t UR_APICALL urProgramGetNativeHandle( return result; } - try { - // convert platform handle to loader handle - *phNativeProgram = reinterpret_cast( - ur_native_factory.getInstance(*phNativeProgram, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -2664,18 +2595,13 @@ __urdlllocal ur_result_t UR_APICALL urProgramCreateWithNativeHandle( ur_result_t result = UR_RESULT_SUCCESS; // extract platform's function pointer table - auto dditable = - reinterpret_cast(hNativeProgram)->dditable; + auto dditable = reinterpret_cast(hContext)->dditable; auto pfnCreateWithNativeHandle = dditable->ur.Program.pfnCreateWithNativeHandle; if (nullptr == pfnCreateWithNativeHandle) { return UR_RESULT_ERROR_UNINITIALIZED; } - // convert loader handle to platform handle - hNativeProgram = - reinterpret_cast(hNativeProgram)->handle; - // convert loader handle to platform handle hContext = reinterpret_cast(hContext)->handle; @@ -3123,14 +3049,6 @@ __urdlllocal ur_result_t UR_APICALL urKernelGetNativeHandle( return result; } - try { - // convert platform handle to loader handle - *phNativeKernel = reinterpret_cast( - ur_native_factory.getInstance(*phNativeKernel, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -3150,18 +3068,13 @@ __urdlllocal ur_result_t UR_APICALL urKernelCreateWithNativeHandle( ur_result_t result = UR_RESULT_SUCCESS; // extract platform's function pointer table - auto dditable = - reinterpret_cast(hNativeKernel)->dditable; + auto dditable = reinterpret_cast(hContext)->dditable; auto pfnCreateWithNativeHandle = dditable->ur.Kernel.pfnCreateWithNativeHandle; if (nullptr == pfnCreateWithNativeHandle) { return UR_RESULT_ERROR_UNINITIALIZED; } - // convert loader handle to platform handle - hNativeKernel = - reinterpret_cast(hNativeKernel)->handle; - // convert loader handle to platform handle hContext = reinterpret_cast(hContext)->handle; @@ -3335,14 +3248,6 @@ __urdlllocal ur_result_t UR_APICALL urQueueGetNativeHandle( return result; } - try { - // convert platform handle to loader handle - *phNativeQueue = reinterpret_cast( - ur_native_factory.getInstance(*phNativeQueue, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -3361,17 +3266,13 @@ __urdlllocal ur_result_t UR_APICALL urQueueCreateWithNativeHandle( ur_result_t result = UR_RESULT_SUCCESS; // extract platform's function pointer table - auto dditable = - reinterpret_cast(hNativeQueue)->dditable; + auto dditable = reinterpret_cast(hContext)->dditable; auto pfnCreateWithNativeHandle = dditable->ur.Queue.pfnCreateWithNativeHandle; if (nullptr == pfnCreateWithNativeHandle) { return UR_RESULT_ERROR_UNINITIALIZED; } - // convert loader handle to platform handle - hNativeQueue = reinterpret_cast(hNativeQueue)->handle; - // convert loader handle to platform handle hContext = reinterpret_cast(hContext)->handle; @@ -3608,14 +3509,6 @@ __urdlllocal ur_result_t UR_APICALL urEventGetNativeHandle( return result; } - try { - // convert platform handle to loader handle - *phNativeEvent = reinterpret_cast( - ur_native_factory.getInstance(*phNativeEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -3633,17 +3526,13 @@ __urdlllocal ur_result_t UR_APICALL urEventCreateWithNativeHandle( ur_result_t result = UR_RESULT_SUCCESS; // extract platform's function pointer table - auto dditable = - reinterpret_cast(hNativeEvent)->dditable; + auto dditable = reinterpret_cast(hContext)->dditable; auto pfnCreateWithNativeHandle = dditable->ur.Event.pfnCreateWithNativeHandle; if (nullptr == pfnCreateWithNativeHandle) { return UR_RESULT_ERROR_UNINITIALIZED; } - // convert loader handle to platform handle - hNativeEvent = reinterpret_cast(hNativeEvent)->handle; - // convert loader handle to platform handle hContext = reinterpret_cast(hContext)->handle; From d839bf6b8e12d3b17dc636a23e5defb1037b99c2 Mon Sep 17 00:00:00 2001 From: "Kenneth Benzie (Benie)" Date: Mon, 11 Dec 2023 12:30:24 +0000 Subject: [PATCH 08/12] Merge pull request #1123 from aarongreig/aaron/usmLocationProps [OpenCL] Add ur_usm_alloc_location_desc struct and handle it in the CL adapter. --- include/ur.py | 20 +++ include/ur_api.h | 23 +++ scripts/core/registry.yml | 3 + scripts/core/usm.yml | 20 +++ source/adapters/opencl/usm.cpp | 158 ++++++++++-------- source/common/ur_params.hpp | 33 ++++ source/loader/ur_libapi.cpp | 3 + source/ur_api.cpp | 3 + test/conformance/usm/usm_adapter_opencl.match | 40 +++++ 9 files changed, 233 insertions(+), 70 deletions(-) create mode 100644 test/conformance/usm/usm_adapter_opencl.match diff --git a/include/ur.py b/include/ur.py index ac57a3cc0d..def6a4213c 100644 --- a/include/ur.py +++ b/include/ur.py @@ -242,6 +242,7 @@ class ur_structure_type_v(IntEnum): KERNEL_EXEC_INFO_PROPERTIES = 31 ## ::ur_kernel_exec_info_properties_t KERNEL_ARG_VALUE_PROPERTIES = 32 ## ::ur_kernel_arg_value_properties_t KERNEL_ARG_LOCAL_PROPERTIES = 33 ## ::ur_kernel_arg_local_properties_t + USM_ALLOC_LOCATION_DESC = 35 ## ::ur_usm_alloc_location_desc_t EXP_COMMAND_BUFFER_DESC = 0x1000 ## ::ur_exp_command_buffer_desc_t EXP_SAMPLER_MIP_PROPERTIES = 0x2000 ## ::ur_exp_sampler_mip_properties_t EXP_INTEROP_MEM_DESC = 0x2001 ## ::ur_exp_interop_mem_desc_t @@ -1530,6 +1531,25 @@ class ur_usm_device_desc_t(Structure): ("flags", ur_usm_device_mem_flags_t) ## [in] device memory allocation flags. ] +############################################################################### +## @brief USM allocation location desc +## +## @details +## - Specify these properties in ::urUSMHostAlloc, ::urUSMDeviceAlloc and +## ::urUSMSharedAlloc via ::ur_usm_desc_t as part of a `pNext` chain. +## +## @remarks +## _Analogues_ +## - cl_intel_mem_alloc_buffer_location +class ur_usm_alloc_location_desc_t(Structure): + _fields_ = [ + ("stype", ur_structure_type_t), ## [in] type of this structure, must be + ## ::UR_STRUCTURE_TYPE_USM_ALLOC_LOCATION_DESC + ("pNext", c_void_p), ## [in][optional] pointer to extension-specific structure + ("location", c_ulong) ## [in] Identifies the ID of global memory partition to which the memory + ## should be allocated. + ] + ############################################################################### ## @brief USM pool descriptor type class ur_usm_pool_desc_t(Structure): diff --git a/include/ur_api.h b/include/ur_api.h index 2ff218b764..d117e27257 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -251,6 +251,7 @@ typedef enum ur_structure_type_t { UR_STRUCTURE_TYPE_KERNEL_EXEC_INFO_PROPERTIES = 31, ///< ::ur_kernel_exec_info_properties_t UR_STRUCTURE_TYPE_KERNEL_ARG_VALUE_PROPERTIES = 32, ///< ::ur_kernel_arg_value_properties_t UR_STRUCTURE_TYPE_KERNEL_ARG_LOCAL_PROPERTIES = 33, ///< ::ur_kernel_arg_local_properties_t + UR_STRUCTURE_TYPE_USM_ALLOC_LOCATION_DESC = 35, ///< ::ur_usm_alloc_location_desc_t UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC = 0x1000, ///< ::ur_exp_command_buffer_desc_t UR_STRUCTURE_TYPE_EXP_SAMPLER_MIP_PROPERTIES = 0x2000, ///< ::ur_exp_sampler_mip_properties_t UR_STRUCTURE_TYPE_EXP_INTEROP_MEM_DESC = 0x2001, ///< ::ur_exp_interop_mem_desc_t @@ -3220,6 +3221,25 @@ typedef struct ur_usm_device_desc_t { } ur_usm_device_desc_t; +/////////////////////////////////////////////////////////////////////////////// +/// @brief USM allocation location desc +/// +/// @details +/// - Specify these properties in ::urUSMHostAlloc, ::urUSMDeviceAlloc and +/// ::urUSMSharedAlloc via ::ur_usm_desc_t as part of a `pNext` chain. +/// +/// @remarks +/// _Analogues_ +/// - cl_intel_mem_alloc_buffer_location +typedef struct ur_usm_alloc_location_desc_t { + ur_structure_type_t stype; ///< [in] type of this structure, must be + ///< ::UR_STRUCTURE_TYPE_USM_ALLOC_LOCATION_DESC + const void *pNext; ///< [in][optional] pointer to extension-specific structure + uint32_t location; ///< [in] Identifies the ID of global memory partition to which the memory + ///< should be allocated. + +} ur_usm_alloc_location_desc_t; + /////////////////////////////////////////////////////////////////////////////// /// @brief USM pool descriptor type typedef struct ur_usm_pool_desc_t { @@ -3257,6 +3277,7 @@ typedef struct ur_usm_pool_limits_desc_t { /// - Any flags/hints passed through pUSMDesc only affect the single /// allocation. /// - See also ::ur_usm_host_desc_t. +/// - See also ::ur_usm_alloc_location_desc_t. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -3300,6 +3321,7 @@ urUSMHostAlloc( /// - Any flags/hints passed through pUSMDesc only affect the single /// allocation. /// - See also ::ur_usm_device_desc_t. +/// - See also ::ur_usm_alloc_location_desc_t. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -3346,6 +3368,7 @@ urUSMDeviceAlloc( /// allocation. /// - See also ::ur_usm_host_desc_t. /// - See also ::ur_usm_device_desc_t. +/// - See also ::ur_usm_alloc_location_desc_t. /// /// @returns /// - ::UR_RESULT_SUCCESS diff --git a/scripts/core/registry.yml b/scripts/core/registry.yml index 2d6ce08500..3a13d3c751 100644 --- a/scripts/core/registry.yml +++ b/scripts/core/registry.yml @@ -645,3 +645,6 @@ etors: - name: KERNEL_ARG_LOCAL_PROPERTIES desc: $x_kernel_arg_local_properties_t value: '33' +- name: USM_ALLOC_LOCATION_DESC + desc: $x_usm_alloc_location_desc_t + value: '35' diff --git a/scripts/core/usm.yml b/scripts/core/usm.yml index 0b793d7226..1476eec34a 100644 --- a/scripts/core/usm.yml +++ b/scripts/core/usm.yml @@ -175,6 +175,23 @@ members: desc: "[in] device memory allocation flags." --- #-------------------------------------------------------------------------- type: struct +desc: "USM allocation location desc" +details: + - Specify these properties in $xUSMHostAlloc, $xUSMDeviceAlloc and + $xUSMSharedAlloc via $x_usm_desc_t as part of a `pNext` chain. +analogue: + - "cl_intel_mem_alloc_buffer_location" +class: $xUSM +name: $x_usm_alloc_location_desc_t +base: $x_base_desc_t +members: + - type: uint32_t + name: location + desc: > + [in] Identifies the ID of global memory partition to which the memory + should be allocated. +--- #-------------------------------------------------------------------------- +type: struct desc: "USM pool descriptor type" class: $xUSM name: $x_usm_pool_desc_t @@ -212,6 +229,7 @@ details: - "Allocations served from different memory pools must be isolated and must not reside on the same page." - "Any flags/hints passed through pUSMDesc only affect the single allocation." - "See also $x_usm_host_desc_t." + - "See also $x_usm_alloc_location_desc_t." params: - type: $x_context_handle_t name: hContext @@ -253,6 +271,7 @@ details: - "Allocations served from different memory pools must be isolated and must not reside on the same page." - "Any flags/hints passed through pUSMDesc only affect the single allocation." - "See also $x_usm_device_desc_t." + - "See also $x_usm_alloc_location_desc_t." params: - type: $x_context_handle_t name: hContext @@ -298,6 +317,7 @@ details: - "Any flags/hints passed through pUSMDesc only affect the single allocation." - "See also $x_usm_host_desc_t." - "See also $x_usm_device_desc_t." + - "See also $x_usm_alloc_location_desc_t." params: - type: $x_context_handle_t name: hContext diff --git a/source/adapters/opencl/usm.cpp b/source/adapters/opencl/usm.cpp index 5d46aec2ef..0d64f23d13 100644 --- a/source/adapters/opencl/usm.cpp +++ b/source/adapters/opencl/usm.cpp @@ -10,6 +10,75 @@ #include "common.hpp" +inline cl_mem_alloc_flags_intel +hostDescToClFlags(const ur_usm_host_desc_t &desc) { + cl_mem_alloc_flags_intel allocFlags = 0; + if (desc.flags & UR_USM_HOST_MEM_FLAG_INITIAL_PLACEMENT) { + allocFlags |= CL_MEM_ALLOC_INITIAL_PLACEMENT_HOST_INTEL; + } + return allocFlags; +} + +inline cl_mem_alloc_flags_intel +deviceDescToClFlags(const ur_usm_device_desc_t &desc) { + cl_mem_alloc_flags_intel allocFlags = 0; + if (desc.flags & UR_USM_DEVICE_MEM_FLAG_INITIAL_PLACEMENT) { + allocFlags |= CL_MEM_ALLOC_INITIAL_PLACEMENT_DEVICE_INTEL; + } + if (desc.flags & UR_USM_DEVICE_MEM_FLAG_WRITE_COMBINED) { + allocFlags |= CL_MEM_ALLOC_WRITE_COMBINED_INTEL; + } + return allocFlags; +} + +ur_result_t +usmDescToCLMemProperties(const ur_base_desc_t *Desc, + std::vector &Properties) { + cl_mem_alloc_flags_intel AllocFlags = 0; + const auto *Next = Desc; + do { + switch (Next->stype) { + case UR_STRUCTURE_TYPE_USM_HOST_DESC: { + auto HostDesc = reinterpret_cast(Next); + if (UR_USM_HOST_MEM_FLAGS_MASK & HostDesc->flags) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + AllocFlags |= hostDescToClFlags(*HostDesc); + break; + } + case UR_STRUCTURE_TYPE_USM_DEVICE_DESC: { + auto DeviceDesc = reinterpret_cast(Next); + if (UR_USM_HOST_MEM_FLAGS_MASK & DeviceDesc->flags) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + AllocFlags |= deviceDescToClFlags(*DeviceDesc); + break; + } + case UR_STRUCTURE_TYPE_USM_ALLOC_LOCATION_DESC: { + auto LocationDesc = + reinterpret_cast(Next); + Properties.push_back(CL_MEM_ALLOC_BUFFER_LOCATION_INTEL); + // CL bitfields are cl_ulong + Properties.push_back(static_cast(LocationDesc->location)); + break; + } + default: + return UR_RESULT_ERROR_INVALID_VALUE; + } + + Next = Next->pNext ? static_cast(Next->pNext) + : nullptr; + } while (Next); + + if (AllocFlags) { + Properties.push_back(CL_MEM_ALLOC_FLAGS_INTEL); + Properties.push_back(AllocFlags); + } + Properties.push_back(0); + + return UR_RESULT_SUCCESS; +} + UR_APIEXPORT ur_result_t UR_APICALL urUSMHostAlloc(ur_context_handle_t hContext, const ur_usm_desc_t *pUSMDesc, ur_usm_pool_handle_t, size_t size, void **ppMem) { @@ -17,23 +86,10 @@ urUSMHostAlloc(ur_context_handle_t hContext, const ur_usm_desc_t *pUSMDesc, void *Ptr = nullptr; uint32_t Alignment = pUSMDesc ? pUSMDesc->align : 0; - cl_mem_alloc_flags_intel Flags = 0; - cl_mem_properties_intel Properties[3]; - - if (pUSMDesc && pUSMDesc->pNext && - static_cast(pUSMDesc->pNext)->stype == - UR_STRUCTURE_TYPE_USM_HOST_DESC) { - const auto *HostDesc = - static_cast(pUSMDesc->pNext); - - if (HostDesc->flags & UR_USM_HOST_MEM_FLAG_INITIAL_PLACEMENT) { - Flags |= CL_MEM_ALLOC_INITIAL_PLACEMENT_HOST_INTEL; - } - Properties[0] = CL_MEM_ALLOC_FLAGS_INTEL; - Properties[1] = Flags; - Properties[2] = 0; - } else { - Properties[0] = 0; + std::vector AllocProperties; + if (pUSMDesc && pUSMDesc->pNext) { + UR_RETURN_ON_FAILURE(usmDescToCLMemProperties( + static_cast(pUSMDesc->pNext), AllocProperties)); } // First we need to look up the function pointer @@ -47,7 +103,9 @@ urUSMHostAlloc(ur_context_handle_t hContext, const ur_usm_desc_t *pUSMDesc, if (FuncPtr) { cl_int ClResult = CL_SUCCESS; - Ptr = FuncPtr(CLContext, Properties, size, Alignment, &ClResult); + Ptr = FuncPtr(CLContext, + AllocProperties.empty() ? nullptr : AllocProperties.data(), + size, Alignment, &ClResult); if (ClResult == CL_INVALID_BUFFER_SIZE) { return UR_RESULT_ERROR_INVALID_USM_SIZE; } @@ -71,25 +129,10 @@ urUSMDeviceAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice, void *Ptr = nullptr; uint32_t Alignment = pUSMDesc ? pUSMDesc->align : 0; - cl_mem_alloc_flags_intel Flags = 0; - cl_mem_properties_intel Properties[3]; - if (pUSMDesc && pUSMDesc->pNext && - static_cast(pUSMDesc->pNext)->stype == - UR_STRUCTURE_TYPE_USM_DEVICE_DESC) { - const auto *HostDesc = - static_cast(pUSMDesc->pNext); - - if (HostDesc->flags & UR_USM_DEVICE_MEM_FLAG_INITIAL_PLACEMENT) { - Flags |= CL_MEM_ALLOC_INITIAL_PLACEMENT_DEVICE_INTEL; - } - if (HostDesc->flags & UR_USM_DEVICE_MEM_FLAG_WRITE_COMBINED) { - Flags |= CL_MEM_ALLOC_WRITE_COMBINED_INTEL; - } - Properties[0] = CL_MEM_ALLOC_FLAGS_INTEL; - Properties[1] = Flags; - Properties[2] = 0; - } else { - Properties[0] = 0; + std::vector AllocProperties; + if (pUSMDesc && pUSMDesc->pNext) { + UR_RETURN_ON_FAILURE(usmDescToCLMemProperties( + static_cast(pUSMDesc->pNext), AllocProperties)); } // First we need to look up the function pointer @@ -104,8 +147,8 @@ urUSMDeviceAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice, if (FuncPtr) { cl_int ClResult = CL_SUCCESS; Ptr = FuncPtr(CLContext, cl_adapter::cast(hDevice), - cl_adapter::cast(Properties), size, - Alignment, &ClResult); + AllocProperties.empty() ? nullptr : AllocProperties.data(), + size, Alignment, &ClResult); if (ClResult == CL_INVALID_BUFFER_SIZE) { return UR_RESULT_ERROR_INVALID_USM_SIZE; } @@ -129,35 +172,10 @@ urUSMSharedAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice, void *Ptr = nullptr; uint32_t Alignment = pUSMDesc ? pUSMDesc->align : 0; - cl_mem_alloc_flags_intel Flags = 0; - const auto *NextStruct = - (pUSMDesc ? static_cast(pUSMDesc->pNext) - : nullptr); - while (NextStruct) { - if (NextStruct->stype == UR_STRUCTURE_TYPE_USM_HOST_DESC) { - const auto *HostDesc = - reinterpret_cast(NextStruct); - if (HostDesc->flags & UR_USM_HOST_MEM_FLAG_INITIAL_PLACEMENT) { - Flags |= CL_MEM_ALLOC_INITIAL_PLACEMENT_HOST_INTEL; - } - } else if (NextStruct->stype == UR_STRUCTURE_TYPE_USM_DEVICE_DESC) { - const auto *DevDesc = - reinterpret_cast(NextStruct); - if (DevDesc->flags & UR_USM_DEVICE_MEM_FLAG_INITIAL_PLACEMENT) { - Flags |= CL_MEM_ALLOC_INITIAL_PLACEMENT_DEVICE_INTEL; - } - if (DevDesc->flags & UR_USM_DEVICE_MEM_FLAG_WRITE_COMBINED) { - Flags |= CL_MEM_ALLOC_WRITE_COMBINED_INTEL; - } - } - NextStruct = static_cast(NextStruct->pNext); - } - - cl_mem_properties_intel Properties[3] = {CL_MEM_ALLOC_FLAGS_INTEL, Flags, 0}; - - // Passing a flags value of 0 doesn't work, so truncate the properties - if (Flags == 0) { - Properties[0] = 0; + std::vector AllocProperties; + if (pUSMDesc && pUSMDesc->pNext) { + UR_RETURN_ON_FAILURE(usmDescToCLMemProperties( + static_cast(pUSMDesc->pNext), AllocProperties)); } // First we need to look up the function pointer @@ -172,8 +190,8 @@ urUSMSharedAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice, if (FuncPtr) { cl_int ClResult = CL_SUCCESS; Ptr = FuncPtr(CLContext, cl_adapter::cast(hDevice), - cl_adapter::cast(Properties), size, - Alignment, cl_adapter::cast(&ClResult)); + AllocProperties.empty() ? nullptr : AllocProperties.data(), + size, Alignment, cl_adapter::cast(&ClResult)); if (ClResult == CL_INVALID_BUFFER_SIZE) { return UR_RESULT_ERROR_INVALID_USM_SIZE; } diff --git a/source/common/ur_params.hpp b/source/common/ur_params.hpp index 22b3b3110e..fa67cd4b36 100644 --- a/source/common/ur_params.hpp +++ b/source/common/ur_params.hpp @@ -337,6 +337,8 @@ inline std::ostream &operator<<(std::ostream &os, const struct ur_usm_host_desc_t params); inline std::ostream &operator<<(std::ostream &os, const struct ur_usm_device_desc_t params); +inline std::ostream & +operator<<(std::ostream &os, const struct ur_usm_alloc_location_desc_t params); inline std::ostream &operator<<(std::ostream &os, const struct ur_usm_pool_desc_t params); inline std::ostream &operator<<(std::ostream &os, @@ -1299,6 +1301,10 @@ inline std::ostream &operator<<(std::ostream &os, os << "UR_STRUCTURE_TYPE_KERNEL_ARG_LOCAL_PROPERTIES"; break; + case UR_STRUCTURE_TYPE_USM_ALLOC_LOCATION_DESC: + os << "UR_STRUCTURE_TYPE_USM_ALLOC_LOCATION_DESC"; + break; + case UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC: os << "UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC"; break; @@ -1534,6 +1540,12 @@ inline void serializeStruct(std::ostream &os, const void *ptr) { ur_params::serializePtr(os, pstruct); } break; + case UR_STRUCTURE_TYPE_USM_ALLOC_LOCATION_DESC: { + const ur_usm_alloc_location_desc_t *pstruct = + (const ur_usm_alloc_location_desc_t *)ptr; + ur_params::serializePtr(os, pstruct); + } break; + case UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC: { const ur_exp_command_buffer_desc_t *pstruct = (const ur_exp_command_buffer_desc_t *)ptr; @@ -7344,6 +7356,27 @@ inline std::ostream &operator<<(std::ostream &os, os << "}"; return os; } +inline std::ostream & +operator<<(std::ostream &os, const struct ur_usm_alloc_location_desc_t params) { + os << "(struct ur_usm_alloc_location_desc_t){"; + + os << ".stype = "; + + os << (params.stype); + + os << ", "; + os << ".pNext = "; + + ur_params::serializeStruct(os, (params.pNext)); + + os << ", "; + os << ".location = "; + + os << (params.location); + + os << "}"; + return os; +} inline std::ostream &operator<<(std::ostream &os, const struct ur_usm_pool_desc_t params) { os << "(struct ur_usm_pool_desc_t){"; diff --git a/source/loader/ur_libapi.cpp b/source/loader/ur_libapi.cpp index ccf1e1e2cf..0929df8d5e 100644 --- a/source/loader/ur_libapi.cpp +++ b/source/loader/ur_libapi.cpp @@ -2090,6 +2090,7 @@ ur_result_t UR_APICALL urSamplerCreateWithNativeHandle( /// - Any flags/hints passed through pUSMDesc only affect the single /// allocation. /// - See also ::ur_usm_host_desc_t. +/// - See also ::ur_usm_alloc_location_desc_t. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -2144,6 +2145,7 @@ ur_result_t UR_APICALL urUSMHostAlloc( /// - Any flags/hints passed through pUSMDesc only affect the single /// allocation. /// - See also ::ur_usm_device_desc_t. +/// - See also ::ur_usm_alloc_location_desc_t. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -2201,6 +2203,7 @@ ur_result_t UR_APICALL urUSMDeviceAlloc( /// allocation. /// - See also ::ur_usm_host_desc_t. /// - See also ::ur_usm_device_desc_t. +/// - See also ::ur_usm_alloc_location_desc_t. /// /// @returns /// - ::UR_RESULT_SUCCESS diff --git a/source/ur_api.cpp b/source/ur_api.cpp index 3e6807d147..ec21a8df85 100644 --- a/source/ur_api.cpp +++ b/source/ur_api.cpp @@ -1773,6 +1773,7 @@ ur_result_t UR_APICALL urSamplerCreateWithNativeHandle( /// - Any flags/hints passed through pUSMDesc only affect the single /// allocation. /// - See also ::ur_usm_host_desc_t. +/// - See also ::ur_usm_alloc_location_desc_t. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -1821,6 +1822,7 @@ ur_result_t UR_APICALL urUSMHostAlloc( /// - Any flags/hints passed through pUSMDesc only affect the single /// allocation. /// - See also ::ur_usm_device_desc_t. +/// - See also ::ur_usm_alloc_location_desc_t. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -1872,6 +1874,7 @@ ur_result_t UR_APICALL urUSMDeviceAlloc( /// allocation. /// - See also ::ur_usm_host_desc_t. /// - See also ::ur_usm_device_desc_t. +/// - See also ::ur_usm_alloc_location_desc_t. /// /// @returns /// - ::UR_RESULT_SUCCESS diff --git a/test/conformance/usm/usm_adapter_opencl.match b/test/conformance/usm/usm_adapter_opencl.match new file mode 100644 index 0000000000..16211ba8e7 --- /dev/null +++ b/test/conformance/usm/usm_adapter_opencl.match @@ -0,0 +1,40 @@ +urUSMDeviceAllocTest.Success/Intel_R__OpenCL___{{.*}}___UsePoolEnabled +urUSMDeviceAllocTest.SuccessWithDescriptors/Intel_R__OpenCL___{{.*}}___UsePoolEnabled +urUSMDeviceAllocTest.InvalidNullHandleContext/Intel_R__OpenCL___{{.*}}___UsePoolEnabled +urUSMDeviceAllocTest.InvalidNullHandleDevice/Intel_R__OpenCL___{{.*}}___UsePoolEnabled +urUSMDeviceAllocTest.InvalidNullPtrResult/Intel_R__OpenCL___{{.*}}___UsePoolEnabled +urUSMDeviceAllocTest.InvalidUSMSize/Intel_R__OpenCL___{{.*}}___UsePoolEnabled +urUSMDeviceAllocTest.InvalidValueAlignPowerOfTwo/Intel_R__OpenCL___{{.*}}___UsePoolEnabled +urUSMAllocInfoTest.Success/Intel_R__OpenCL___{{.*}}___UR_USM_ALLOC_INFO_POOL +urUSMHostAllocTest.Success/Intel_R__OpenCL___{{.*}}___UsePoolEnabled +urUSMHostAllocTest.SuccessWithDescriptors/Intel_R__OpenCL___{{.*}}___UsePoolEnabled +urUSMHostAllocTest.InvalidNullHandleContext/Intel_R__OpenCL___{{.*}}___UsePoolEnabled +urUSMHostAllocTest.InvalidNullPtrMem/Intel_R__OpenCL___{{.*}}___UsePoolEnabled +urUSMHostAllocTest.InvalidUSMSize/Intel_R__OpenCL___{{.*}}___UsePoolEnabled +urUSMHostAllocTest.InvalidValueAlignPowerOfTwo/Intel_R__OpenCL___{{.*}}___UsePoolEnabled +urUSMPoolCreateTest.Success/Intel_R__OpenCL___{{.*}}_ +urUSMPoolCreateTest.SuccessWithFlag/Intel_R__OpenCL___{{.*}}_ +urUSMPoolCreateTest.InvalidNullHandleContext/Intel_R__OpenCL___{{.*}}_ +urUSMPoolCreateTest.InvalidNullPointerPoolDesc/Intel_R__OpenCL___{{.*}}_ +urUSMPoolCreateTest.InvalidNullPointerPool/Intel_R__OpenCL___{{.*}}_ +urUSMPoolCreateTest.InvalidEnumerationFlags/Intel_R__OpenCL___{{.*}}_ +urUSMPoolGetInfoTestWithInfoParam.Success/Intel_R__OpenCL___{{.*}}___UR_USM_POOL_INFO_CONTEXT +urUSMPoolGetInfoTestWithInfoParam.Success/Intel_R__OpenCL___{{.*}}___UR_USM_POOL_INFO_REFERENCE_COUNT +urUSMPoolGetInfoTest.InvalidNullHandlePool/Intel_R__OpenCL___{{.*}}_ +urUSMPoolGetInfoTest.InvalidEnumerationProperty/Intel_R__OpenCL___{{.*}}_ +urUSMPoolGetInfoTest.InvalidSizeZero/Intel_R__OpenCL___{{.*}}_ +urUSMPoolGetInfoTest.InvalidSizeTooSmall/Intel_R__OpenCL___{{.*}}_ +urUSMPoolGetInfoTest.InvalidNullPointerPropValue/Intel_R__OpenCL___{{.*}}_ +urUSMPoolGetInfoTest.InvalidNullPointerPropSizeRet/Intel_R__OpenCL___{{.*}}_ +urUSMPoolDestroyTest.Success/Intel_R__OpenCL___{{.*}}_ +urUSMPoolDestroyTest.InvalidNullHandleContext/Intel_R__OpenCL___{{.*}}_ +urUSMPoolRetainTest.Success/Intel_R__OpenCL___{{.*}}_ +urUSMPoolRetainTest.InvalidNullHandlePool/Intel_R__OpenCL___{{.*}}_ +urUSMSharedAllocTest.Success/Intel_R__OpenCL___{{.*}}___UsePoolEnabled +urUSMSharedAllocTest.SuccessWithDescriptors/Intel_R__OpenCL___{{.*}}___UsePoolEnabled +urUSMSharedAllocTest.SuccessWithMultipleAdvices/Intel_R__OpenCL___{{.*}}___UsePoolEnabled +urUSMSharedAllocTest.InvalidNullHandleContext/Intel_R__OpenCL___{{.*}}___UsePoolEnabled +urUSMSharedAllocTest.InvalidNullHandleDevice/Intel_R__OpenCL___{{.*}}___UsePoolEnabled +urUSMSharedAllocTest.InvalidNullPtrMem/Intel_R__OpenCL___{{.*}}___UsePoolEnabled +urUSMSharedAllocTest.InvalidUSMSize/Intel_R__OpenCL___{{.*}}___UsePoolEnabled +urUSMSharedAllocTest.InvalidValueAlignPowerOfTwo/Intel_R__OpenCL___{{.*}}___UsePoolEnabled From 5f3b28fa4839fd1f6ba8c6eaf1d042ad33dc6a15 Mon Sep 17 00:00:00 2001 From: "Kenneth Benzie (Benie)" Date: Wed, 13 Dec 2023 12:06:14 +0000 Subject: [PATCH 09/12] Merge pull request #1179 from pbalcer/coverity-issues [L0] coverity fixes --- source/adapters/level_zero/adapter.cpp | 11 ++++------- source/adapters/level_zero/device.cpp | 19 ++++++++++--------- source/adapters/level_zero/device.hpp | 3 ++- source/adapters/level_zero/kernel.hpp | 6 ++++-- source/adapters/level_zero/memory.cpp | 10 +++++----- source/adapters/level_zero/memory.hpp | 16 +++++++++------- source/adapters/level_zero/platform.hpp | 4 +++- source/adapters/level_zero/queue.cpp | 6 +++--- source/adapters/level_zero/queue.hpp | 3 ++- 9 files changed, 42 insertions(+), 36 deletions(-) mode change 100755 => 100644 source/adapters/level_zero/queue.cpp diff --git a/source/adapters/level_zero/adapter.cpp b/source/adapters/level_zero/adapter.cpp index 67b1b26e7f..5b9f39e743 100644 --- a/source/adapters/level_zero/adapter.cpp +++ b/source/adapters/level_zero/adapter.cpp @@ -174,17 +174,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterRetain(ur_adapter_handle_t) { } UR_APIEXPORT ur_result_t UR_APICALL urAdapterGetLastError( - [[maybe_unused]] ur_adapter_handle_t - AdapterHandle, ///< [in] handle of the platform instance + ur_adapter_handle_t, ///< [in] handle of the platform instance const char **Message, ///< [out] pointer to a C string where the adapter ///< specific error message will be stored. - [[maybe_unused]] int32_t - *Error ///< [out] pointer to an integer where the adapter specific - ///< error code will be stored. + int32_t *Error ///< [out] pointer to an integer where the adapter specific + ///< error code will be stored. ) { - AdapterHandle = &Adapter; *Message = ErrorMessage; - Error = &ErrorAdapterNativeCode; + *Error = ErrorAdapterNativeCode; return ErrorMessageCode; } diff --git a/source/adapters/level_zero/device.cpp b/source/adapters/level_zero/device.cpp index acc7c755f4..05b66e12f4 100644 --- a/source/adapters/level_zero/device.cpp +++ b/source/adapters/level_zero/device.cpp @@ -12,6 +12,7 @@ #include "ur_level_zero.hpp" #include #include +#include UR_APIEXPORT ur_result_t UR_APICALL urDeviceGet( ur_platform_handle_t Platform, ///< [in] handle of the platform instance @@ -353,8 +354,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo( UR_DEVICE_AFFINITY_DOMAIN_FLAG_NEXT_PARTITIONABLE)); case UR_DEVICE_INFO_PARTITION_TYPE: { // For root-device there is no partitioning to report. - if (pSize && !Device->isSubDevice()) { - *pSize = 0; + if (Device->SubDeviceCreationProperty == std::nullopt || + !Device->isSubDevice()) { + if (pSize) + *pSize = 0; return UR_RESULT_SUCCESS; } @@ -365,7 +368,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo( return ReturnValue(cslice); } - return ReturnValue(Device->SubDeviceCreationProperty); + return ReturnValue(*Device->SubDeviceCreationProperty); } // Everything under here is not supported yet case UR_EXT_DEVICE_INFO_OPENCL_C_VERSION: @@ -1218,16 +1221,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urDevicePartition( UR_ASSERT(NumDevices == EffectiveNumDevices, UR_RESULT_ERROR_INVALID_VALUE); for (uint32_t I = 0; I < NumDevices; I++) { - Device->SubDevices[I]->SubDeviceCreationProperty = - Properties->pProperties[0]; - if (Properties->pProperties[0].type == - UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN) { + auto prop = Properties->pProperties[0]; + if (prop.type == UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN) { // In case the value is NEXT_PARTITIONABLE, we need to change it to the // chosen domain. This will always be NUMA since that's the only domain // supported by level zero. - Device->SubDevices[I]->SubDeviceCreationProperty.value.affinity_domain = - UR_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA; + prop.value.affinity_domain = UR_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA; } + Device->SubDevices[I]->SubDeviceCreationProperty = prop; OutDevices[I] = Device->SubDevices[I]; // reusing the same pi_device needs to increment the reference count diff --git a/source/adapters/level_zero/device.hpp b/source/adapters/level_zero/device.hpp index 5f34efab44..3b91b70058 100644 --- a/source/adapters/level_zero/device.hpp +++ b/source/adapters/level_zero/device.hpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -116,7 +117,7 @@ struct ur_device_handle_t_ : _ur_object { // If this device is a subdevice, this variable contains the properties that // were used during its creation. - ur_device_partition_property_t SubDeviceCreationProperty; + std::optional SubDeviceCreationProperty; // PI platform to which this device belongs. // This field is only set at _ur_device_handle_t creation time, and cannot diff --git a/source/adapters/level_zero/kernel.hpp b/source/adapters/level_zero/kernel.hpp index 64f6e4f939..4ef21ce18b 100644 --- a/source/adapters/level_zero/kernel.hpp +++ b/source/adapters/level_zero/kernel.hpp @@ -16,13 +16,15 @@ struct ur_kernel_handle_t_ : _ur_object { ur_kernel_handle_t_(ze_kernel_handle_t Kernel, bool OwnZeHandle, ur_program_handle_t Program) - : Program{Program}, ZeKernel{Kernel}, SubmissionsCount{0}, MemAllocs{} { + : Context{nullptr}, Program{Program}, ZeKernel{Kernel}, + SubmissionsCount{0}, MemAllocs{} { OwnNativeHandle = OwnZeHandle; } ur_kernel_handle_t_(ze_kernel_handle_t Kernel, bool OwnZeHandle, ur_context_handle_t Context) - : Context{Context}, ZeKernel{Kernel}, SubmissionsCount{0}, MemAllocs{} { + : Context{Context}, Program{nullptr}, ZeKernel{Kernel}, + SubmissionsCount{0}, MemAllocs{} { OwnNativeHandle = OwnZeHandle; } diff --git a/source/adapters/level_zero/memory.cpp b/source/adapters/level_zero/memory.cpp index aefa661dac..fa3ef18e47 100644 --- a/source/adapters/level_zero/memory.cpp +++ b/source/adapters/level_zero/memory.cpp @@ -2078,9 +2078,9 @@ ur_result_t _ur_buffer::getZeHandle(char *&ZeHandle, access_mode_t AccessMode, auto &Allocation = Allocations[Device]; // Sub-buffers don't maintain own allocations but rely on parent buffer. - if (isSubBuffer()) { - UR_CALL(SubBuffer.Parent->getZeHandle(ZeHandle, AccessMode, Device)); - ZeHandle += SubBuffer.Origin; + if (SubBuffer) { + UR_CALL(SubBuffer->Parent->getZeHandle(ZeHandle, AccessMode, Device)); + ZeHandle += SubBuffer->Origin; // Still store the allocation info in the PI sub-buffer for // getZeHandlePtr to work. At least zeKernelSetArgumentValue needs to // be given a pointer to the allocation handle rather than its value. @@ -2312,7 +2312,7 @@ ur_result_t _ur_buffer::free() { // Buffer constructor _ur_buffer::_ur_buffer(ur_context_handle_t Context, size_t Size, char *HostPtr, bool ImportedHostPtr = false) - : ur_mem_handle_t_(Context), Size(Size), SubBuffer{nullptr, 0} { + : ur_mem_handle_t_(Context), Size(Size) { // We treat integrated devices (physical memory shared with the CPU) // differently from discrete devices (those with distinct memories). @@ -2347,7 +2347,7 @@ _ur_buffer::_ur_buffer(ur_context_handle_t Context, ur_device_handle_t Device, _ur_buffer::_ur_buffer(ur_context_handle_t Context, size_t Size, ur_device_handle_t Device, char *ZeMemHandle, bool OwnZeMemHandle) - : ur_mem_handle_t_(Context, Device), Size(Size), SubBuffer{nullptr, 0} { + : ur_mem_handle_t_(Context, Device), Size(Size) { // Device == nullptr means host allocation Allocations[Device].ZeHandle = ZeMemHandle; diff --git a/source/adapters/level_zero/memory.hpp b/source/adapters/level_zero/memory.hpp index 54f9a84e6b..8efd5b136e 100644 --- a/source/adapters/level_zero/memory.hpp +++ b/source/adapters/level_zero/memory.hpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -84,7 +85,8 @@ struct ur_mem_handle_t_ : _ur_object { virtual ~ur_mem_handle_t_() = default; protected: - ur_mem_handle_t_(ur_context_handle_t Context) : UrContext{Context} {} + ur_mem_handle_t_(ur_context_handle_t Context) + : UrContext{Context}, UrDevice{nullptr} {} ur_mem_handle_t_(ur_context_handle_t Context, ur_device_handle_t Device) : UrContext{Context}, UrDevice(Device) {} @@ -101,7 +103,7 @@ struct _ur_buffer final : ur_mem_handle_t_ { // Sub-buffer constructor _ur_buffer(_ur_buffer *Parent, size_t Origin, size_t Size) : ur_mem_handle_t_(Parent->UrContext), - Size(Size), SubBuffer{Parent, Origin} {} + Size(Size), SubBuffer{{Parent, Origin}} {} // Interop-buffer constructor _ur_buffer(ur_context_handle_t Context, size_t Size, @@ -121,8 +123,7 @@ struct _ur_buffer final : ur_mem_handle_t_ { ur_device_handle_t Device = nullptr) override; bool isImage() const override { return false; } - - bool isSubBuffer() const { return SubBuffer.Parent != nullptr; } + bool isSubBuffer() const { return SubBuffer != std::nullopt; } // Frees all allocations made for the buffer. ur_result_t free(); @@ -174,10 +175,11 @@ struct _ur_buffer final : ur_mem_handle_t_ { size_t Size; size_t getAlignment() const; - struct { + struct SubBuffer_t { _ur_buffer *Parent; - size_t Origin; // only valid if Parent != nullptr - } SubBuffer; + size_t Origin; + }; + std::optional SubBuffer; }; struct _ur_image final : ur_mem_handle_t_ { diff --git a/source/adapters/level_zero/platform.hpp b/source/adapters/level_zero/platform.hpp index f7b9576189..86aa4ec745 100644 --- a/source/adapters/level_zero/platform.hpp +++ b/source/adapters/level_zero/platform.hpp @@ -10,11 +10,13 @@ #pragma once #include "common.hpp" +#include "ze_api.h" struct ur_device_handle_t_; struct ur_platform_handle_t_ : public _ur_platform { - ur_platform_handle_t_(ze_driver_handle_t Driver) : ZeDriver{Driver} {} + ur_platform_handle_t_(ze_driver_handle_t Driver) + : ZeDriver{Driver}, ZeApiVersion{ZE_API_VERSION_CURRENT} {} // Performs initialization of a newly constructed PI platform. ur_result_t initialize(); diff --git a/source/adapters/level_zero/queue.cpp b/source/adapters/level_zero/queue.cpp old mode 100755 new mode 100644 index 994f595a5d..f07e0df675 --- a/source/adapters/level_zero/queue.cpp +++ b/source/adapters/level_zero/queue.cpp @@ -219,7 +219,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueGetInfo( if (ImmCmdList == Queue->CommandListMap.end()) continue; - auto EventList = ImmCmdList->second.EventList; + const auto &EventList = ImmCmdList->second.EventList; for (auto It = EventList.crbegin(); It != EventList.crend(); It++) { ze_result_t ZeResult = ZE_CALL_NOCHECK(zeEventQueryStatus, ((*It)->ZeEvent)); @@ -391,11 +391,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate( // At this point only the thread creating the queue will have associated // command-lists. Other threads have not accessed the queue yet. So we can // only warmup the initial thread's command-lists. - auto QueueGroup = Q->ComputeQueueGroupsByTID.get(); + const auto &QueueGroup = Q->ComputeQueueGroupsByTID.get(); UR_CALL(warmupQueueGroup(false, QueueGroup.UpperIndex - QueueGroup.LowerIndex + 1)); if (Q->useCopyEngine()) { - auto QueueGroup = Q->CopyQueueGroupsByTID.get(); + const auto &QueueGroup = Q->CopyQueueGroupsByTID.get(); UR_CALL(warmupQueueGroup(true, QueueGroup.UpperIndex - QueueGroup.LowerIndex + 1)); } diff --git a/source/adapters/level_zero/queue.hpp b/source/adapters/level_zero/queue.hpp index 306cec5416..88281925ce 100644 --- a/source/adapters/level_zero/queue.hpp +++ b/source/adapters/level_zero/queue.hpp @@ -424,7 +424,8 @@ struct ur_queue_handle_t_ : _ur_object { // checked. Otherwise, the OpenCommandList containing compute commands is // checked. bool hasOpenCommandList(bool IsCopy) const { - auto CommandBatch = (IsCopy) ? CopyCommandBatch : ComputeCommandBatch; + const auto &CommandBatch = + (IsCopy) ? CopyCommandBatch : ComputeCommandBatch; return CommandBatch.OpenCommandList != CommandListMap.end(); } From 3748aaf0c6b5c90688880f3a9fb0bcc738c72cdf Mon Sep 17 00:00:00 2001 From: "Kenneth Benzie (Benie)" Date: Thu, 14 Dec 2023 12:03:01 +0000 Subject: [PATCH 10/12] Merge pull request #1105 from jandres742/fixtestusm [UR][L0] Add several fixes to L0 adapter for test-usm --- source/adapters/level_zero/context.hpp | 3 + source/adapters/level_zero/usm.cpp | 94 ++++++++++++++++--- source/adapters/level_zero/usm.hpp | 2 + .../usm/usm_adapter_level_zero.match | 13 +-- 4 files changed, 88 insertions(+), 24 deletions(-) diff --git a/source/adapters/level_zero/context.hpp b/source/adapters/level_zero/context.hpp index 94935ee59e..96935d470e 100644 --- a/source/adapters/level_zero/context.hpp +++ b/source/adapters/level_zero/context.hpp @@ -115,6 +115,9 @@ struct ur_context_handle_t_ : _ur_object { SharedReadOnlyMemProxyPools; umf::pool_unique_handle_t HostMemProxyPool; + // Map associating pools created with urUsmPoolCreate and internal pools + std::list UsmPoolHandles{}; + // We need to store all memory allocations in the context because there could // be kernels with indirect access. Kernels with indirect access start to // reference all existing memory allocations at the time when they are diff --git a/source/adapters/level_zero/usm.cpp b/source/adapters/level_zero/usm.cpp index c6d98855e7..d2dfc9b37d 100644 --- a/source/adapters/level_zero/usm.cpp +++ b/source/adapters/level_zero/usm.cpp @@ -187,8 +187,15 @@ static ur_result_t USMDeviceAllocImpl(void **ResultPtr, ZeDesc.pNext = &RelaxedDesc; } - ZE2UR_CALL(zeMemAllocDevice, (Context->ZeContext, &ZeDesc, Size, Alignment, - Device->ZeDevice, ResultPtr)); + ze_result_t ZeResult = ZE_CALL_NOCHECK( + zeMemAllocDevice, (Context->ZeContext, &ZeDesc, Size, Alignment, + Device->ZeDevice, ResultPtr)); + if (ZeResult != ZE_RESULT_SUCCESS) { + if (ZeResult == ZE_RESULT_ERROR_UNSUPPORTED_SIZE) { + return UR_RESULT_ERROR_INVALID_USM_SIZE; + } + return ze2urResult(ZeResult); + } UR_ASSERT(Alignment == 0 || reinterpret_cast(*ResultPtr) % Alignment == 0, @@ -226,8 +233,15 @@ static ur_result_t USMSharedAllocImpl(void **ResultPtr, ZeDevDesc.pNext = &RelaxedDesc; } - ZE2UR_CALL(zeMemAllocShared, (Context->ZeContext, &ZeDevDesc, &ZeHostDesc, - Size, Alignment, Device->ZeDevice, ResultPtr)); + ze_result_t ZeResult = ZE_CALL_NOCHECK( + zeMemAllocShared, (Context->ZeContext, &ZeDevDesc, &ZeHostDesc, Size, + Alignment, Device->ZeDevice, ResultPtr)); + if (ZeResult != ZE_RESULT_SUCCESS) { + if (ZeResult == ZE_RESULT_ERROR_UNSUPPORTED_SIZE) { + return UR_RESULT_ERROR_INVALID_USM_SIZE; + } + return ze2urResult(ZeResult); + } UR_ASSERT(Alignment == 0 || reinterpret_cast(*ResultPtr) % Alignment == 0, @@ -254,8 +268,15 @@ static ur_result_t USMHostAllocImpl(void **ResultPtr, // TODO: translate PI properties to Level Zero flags ZeStruct ZeHostDesc; ZeHostDesc.flags = 0; - ZE2UR_CALL(zeMemAllocHost, - (Context->ZeContext, &ZeHostDesc, Size, Alignment, ResultPtr)); + ze_result_t ZeResult = + ZE_CALL_NOCHECK(zeMemAllocHost, (Context->ZeContext, &ZeHostDesc, Size, + Alignment, ResultPtr)); + if (ZeResult != ZE_RESULT_SUCCESS) { + if (ZeResult == ZE_RESULT_ERROR_UNSUPPORTED_SIZE) { + return UR_RESULT_ERROR_INVALID_USM_SIZE; + } + return ze2urResult(ZeResult); + } UR_ASSERT(Alignment == 0 || reinterpret_cast(*ResultPtr) % Alignment == 0, @@ -599,6 +620,40 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMGetMemAllocInfo( ZE2UR_CALL(zeMemGetAddressRange, (Context->ZeContext, Ptr, nullptr, &Size)); return ReturnValue(Size); } + case UR_USM_ALLOC_INFO_POOL: { + auto UMFPool = umfPoolByPtr(Ptr); + if (!UMFPool) { + return UR_RESULT_ERROR_INVALID_VALUE; + } + + std::shared_lock ContextLock(Context->Mutex); + + auto SearchMatchingPool = + [](std::unordered_map + &PoolMap, + umf_memory_pool_handle_t UMFPool) { + for (auto &PoolPair : PoolMap) { + if (PoolPair.second.get() == UMFPool) { + return true; + } + } + return false; + }; + + for (auto &Pool : Context->UsmPoolHandles) { + if (SearchMatchingPool(Pool->DeviceMemPools, UMFPool)) { + return ReturnValue(Pool); + } + if (SearchMatchingPool(Pool->SharedMemPools, UMFPool)) { + return ReturnValue(Pool); + } + if (Pool->HostMemPool.get() == UMFPool) { + return ReturnValue(Pool); + } + } + + return UR_RESULT_ERROR_INVALID_VALUE; + } default: urPrint("urUSMGetMemAllocInfo: unsupported ParamName\n"); return UR_RESULT_ERROR_INVALID_VALUE; @@ -748,6 +803,7 @@ ur_result_t L0HostMemoryProvider::allocateImpl(void **ResultPtr, size_t Size, ur_usm_pool_handle_t_::ur_usm_pool_handle_t_(ur_context_handle_t Context, ur_usm_pool_desc_t *PoolDesc) { + this->Context = Context; zeroInit = static_cast(PoolDesc->flags & UR_USM_POOL_FLAG_ZERO_INITIALIZE_BLOCK); @@ -831,6 +887,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMPoolCreate( try { *Pool = reinterpret_cast( new ur_usm_pool_handle_t_(Context, PoolDesc)); + + std::shared_lock ContextLock(Context->Mutex); + Context->UsmPoolHandles.insert(Context->UsmPoolHandles.cend(), *Pool); + } catch (const UsmAllocationException &Ex) { return Ex.getError(); } @@ -848,6 +908,8 @@ ur_result_t urUSMPoolRelease(ur_usm_pool_handle_t Pool ///< [in] pointer to USM memory pool ) { if (Pool->RefCount.decrementAndTest()) { + std::shared_lock ContextLock(Pool->Context->Mutex); + Pool->Context->UsmPoolHandles.remove(Pool); delete Pool; } return UR_RESULT_SUCCESS; @@ -861,13 +923,19 @@ ur_result_t urUSMPoolGetInfo( ///< property size_t *PropSizeRet ///< [out] size in bytes returned in pool property value ) { - std::ignore = Pool; - std::ignore = PropName; - std::ignore = PropSize; - std::ignore = PropValue; - std::ignore = PropSizeRet; - urPrint("[UR][L0] %s function not implemented!\n", __FUNCTION__); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + UrReturnHelper ReturnValue(PropSize, PropValue, PropSizeRet); + + switch (PropName) { + case UR_USM_POOL_INFO_REFERENCE_COUNT: { + return ReturnValue(Pool->RefCount.load()); + } + case UR_USM_POOL_INFO_CONTEXT: { + return ReturnValue(Pool->Context); + } + default: { + return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; + } + } } // If indirect access tracking is not enabled then this functions just performs diff --git a/source/adapters/level_zero/usm.hpp b/source/adapters/level_zero/usm.hpp index 01e215c578..958fca9354 100644 --- a/source/adapters/level_zero/usm.hpp +++ b/source/adapters/level_zero/usm.hpp @@ -29,6 +29,8 @@ struct ur_usm_pool_handle_t_ : _ur_object { SharedReadOnlyMemPools; umf::pool_unique_handle_t HostMemPool; + ur_context_handle_t Context{}; + ur_usm_pool_handle_t_(ur_context_handle_t Context, ur_usm_pool_desc_t *PoolDesc); }; diff --git a/test/conformance/usm/usm_adapter_level_zero.match b/test/conformance/usm/usm_adapter_level_zero.match index bf45b83ec2..c036fa785c 100644 --- a/test/conformance/usm/usm_adapter_level_zero.match +++ b/test/conformance/usm/usm_adapter_level_zero.match @@ -1,11 +1,2 @@ -urUSMDeviceAllocTest.InvalidUSMSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolEnabled -urUSMDeviceAllocTest.InvalidUSMSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolDisabled -urUSMAllocInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_USM_ALLOC_INFO_POOL -urUSMHostAllocTest.InvalidUSMSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolEnabled -urUSMHostAllocTest.InvalidUSMSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolDisabled -urUSMPoolGetInfoTestWithInfoParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_USM_POOL_INFO_CONTEXT -urUSMPoolGetInfoTestWithInfoParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_USM_POOL_INFO_REFERENCE_COUNT -urUSMPoolGetInfoTest.InvalidSizeTooSmall/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urUSMPoolRetainTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urUSMSharedAllocTest.InvalidUSMSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolEnabled -urUSMSharedAllocTest.InvalidUSMSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolDisabled +{{OPT}}urUSMDeviceAllocTest.InvalidUSMSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolEnabled +{{OPT}}urUSMDeviceAllocTest.InvalidUSMSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolDisabled From 7e098bc87951eb2606135f70edbaa05c6c3e4b7c Mon Sep 17 00:00:00 2001 From: "Kenneth Benzie (Benie)" Date: Thu, 14 Dec 2023 17:54:55 +0000 Subject: [PATCH 11/12] Merge pull request #1061 from jandres742/updateL0loader [UR][L0] Upgrade L0 loader to v1.15.1 --- source/adapters/level_zero/CMakeLists.txt | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/source/adapters/level_zero/CMakeLists.txt b/source/adapters/level_zero/CMakeLists.txt index 223692e109..7b24223b95 100644 --- a/source/adapters/level_zero/CMakeLists.txt +++ b/source/adapters/level_zero/CMakeLists.txt @@ -22,8 +22,19 @@ endif() if (NOT DEFINED LEVEL_ZERO_LIBRARY OR NOT DEFINED LEVEL_ZERO_INCLUDE_DIR) message(STATUS "Download Level Zero loader and headers from github.com") + # Workaround warnings/errors for Level Zero build + set(CMAKE_CXX_FLAGS_BAK "${CMAKE_CXX_FLAGS}") + if (UNIX) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-but-set-variable") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-pedantic") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-stringop-truncation") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-parameter") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-c++98-compat-extra-semi") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unknown-warning-option") + endif() + set(LEVEL_ZERO_LOADER_REPO "https://github.com/oneapi-src/level-zero.git") - set(LEVEL_ZERO_LOADER_TAG v1.11.0) + set(LEVEL_ZERO_LOADER_TAG v1.15.1) # Disable due to a bug https://github.com/oneapi-src/level-zero/issues/104 set(CMAKE_INCLUDE_CURRENT_DIR OFF) @@ -42,6 +53,9 @@ if (NOT DEFINED LEVEL_ZERO_LIBRARY OR NOT DEFINED LEVEL_ZERO_INCLUDE_DIR) FetchContent_MakeAvailable(level-zero-loader) FetchContent_GetProperties(level-zero-loader) + # Restore original flags + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS_BAK}") + target_compile_options(ze_loader PRIVATE $<$,GNU;Clang;Intel;IntelLLVM>:-Wno-error> $<$:/WX- /UUNICODE> From 6490baac75448a879595a303cc23421a6ca33195 Mon Sep 17 00:00:00 2001 From: "Kenneth Benzie (Benie)" Date: Fri, 15 Dec 2023 10:09:02 +0000 Subject: [PATCH 12/12] Merge pull request #1185 from aarongreig/aaron/fixCoverityCudaCL Fix coverity issues in OpenCL, cuda and hip adapters. --- source/adapters/cuda/command_buffer.cpp | 4 +-- source/adapters/cuda/command_buffer.hpp | 6 ++--- source/adapters/cuda/device.cpp | 15 ++++++----- source/adapters/cuda/event.cpp | 29 +++++++------------- source/adapters/cuda/event.hpp | 18 ++++++++++--- source/adapters/cuda/image.cpp | 2 +- source/adapters/cuda/memory.hpp | 4 +-- source/adapters/cuda/program.cpp | 3 ++- source/adapters/cuda/sampler.cpp | 2 +- source/adapters/hip/device.hpp | 2 +- source/adapters/hip/enqueue.cpp | 35 +++++++------------------ source/adapters/hip/event.cpp | 22 +++++++++------- source/adapters/hip/event.hpp | 5 ++-- source/adapters/hip/memory.hpp | 4 +-- source/adapters/hip/program.cpp | 1 - source/adapters/hip/sampler.cpp | 1 - source/adapters/opencl/program.cpp | 11 ++++---- 17 files changed, 77 insertions(+), 87 deletions(-) diff --git a/source/adapters/cuda/command_buffer.cpp b/source/adapters/cuda/command_buffer.cpp index f25e96a732..59e433b602 100644 --- a/source/adapters/cuda/command_buffer.cpp +++ b/source/adapters/cuda/command_buffer.cpp @@ -21,8 +21,8 @@ ur_exp_command_buffer_handle_t_::ur_exp_command_buffer_handle_t_( ur_context_handle_t hContext, ur_device_handle_t hDevice) - : Context(hContext), - Device(hDevice), CudaGraph{nullptr}, CudaGraphExec{nullptr}, RefCount{1} { + : Context(hContext), Device(hDevice), CudaGraph{nullptr}, + CudaGraphExec{nullptr}, RefCount{1}, NextSyncPoint{0} { urContextRetain(hContext); urDeviceRetain(hDevice); } diff --git a/source/adapters/cuda/command_buffer.hpp b/source/adapters/cuda/command_buffer.hpp index 4ceab42062..18264410c4 100644 --- a/source/adapters/cuda/command_buffer.hpp +++ b/source/adapters/cuda/command_buffer.hpp @@ -184,7 +184,7 @@ struct ur_exp_command_buffer_handle_t_ { void RegisterSyncPoint(ur_exp_command_buffer_sync_point_t SyncPoint, std::shared_ptr CuNode) { - SyncPoints[SyncPoint] = CuNode; + SyncPoints[SyncPoint] = std::move(CuNode); NextSyncPoint++; } @@ -193,12 +193,12 @@ struct ur_exp_command_buffer_handle_t_ { } // Helper to register next sync point - // @param CuNode Node to register as next sycn point + // @param CuNode Node to register as next sync point // @return Pointer to the sync that registers the Node ur_exp_command_buffer_sync_point_t AddSyncPoint(std::shared_ptr CuNode) { ur_exp_command_buffer_sync_point_t SyncPoint = NextSyncPoint; - RegisterSyncPoint(SyncPoint, CuNode); + RegisterSyncPoint(SyncPoint, std::move(CuNode)); return SyncPoint; } diff --git a/source/adapters/cuda/device.cpp b/source/adapters/cuda/device.cpp index a4877236ae..8d95ad05e8 100644 --- a/source/adapters/cuda/device.cpp +++ b/source/adapters/cuda/device.cpp @@ -1143,17 +1143,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceCreateWithNativeHandle( if (Result != UR_RESULT_SUCCESS) return Result; - ur_platform_handle_t *Plat = static_cast( - malloc(NumPlatforms * sizeof(ur_platform_handle_t))); - Result = urPlatformGet(&AdapterHandle, 1, NumPlatforms, Plat, nullptr); + std::vector Platforms(NumPlatforms); + + Result = + urPlatformGet(&AdapterHandle, 1, NumPlatforms, Platforms.data(), nullptr); if (Result != UR_RESULT_SUCCESS) return Result; // Iterate through platforms to find device that matches nativeHandle - for (uint32_t j = 0; j < NumPlatforms; ++j) { - auto SearchRes = - std::find_if(begin(Plat[j]->Devices), end(Plat[j]->Devices), IsDevice); - if (SearchRes != end(Plat[j]->Devices)) { + for (const auto Platform : Platforms) { + auto SearchRes = std::find_if(std::begin(Platform->Devices), + std::end(Platform->Devices), IsDevice); + if (SearchRes != end(Platform->Devices)) { *phDevice = static_cast((*SearchRes).get()); return UR_RESULT_SUCCESS; } diff --git a/source/adapters/cuda/event.cpp b/source/adapters/cuda/event.cpp index 6137f0ecce..2cbfcbc39b 100644 --- a/source/adapters/cuda/event.cpp +++ b/source/adapters/cuda/event.cpp @@ -9,7 +9,6 @@ //===----------------------------------------------------------------------===// #include "event.hpp" -#include "common.hpp" #include "context.hpp" #include "device.hpp" #include "queue.hpp" @@ -19,26 +18,15 @@ ur_event_handle_t_::ur_event_handle_t_(ur_command_t Type, ur_context_handle_t Context, - ur_queue_handle_t Queue, CUstream Stream, + ur_queue_handle_t Queue, + native_type EvEnd, native_type EvQueued, + native_type EvStart, CUstream Stream, uint32_t StreamToken) : CommandType{Type}, RefCount{1}, HasOwnership{true}, HasBeenWaitedOn{false}, IsRecorded{false}, IsStarted{false}, - StreamToken{StreamToken}, EvEnd{nullptr}, EvStart{nullptr}, - EvQueued{nullptr}, Queue{Queue}, Stream{Stream}, Context{Context} { - - bool ProfilingEnabled = Queue->URFlags & UR_QUEUE_FLAG_PROFILING_ENABLE; - - UR_CHECK_ERROR(cuEventCreate( - &EvEnd, ProfilingEnabled ? CU_EVENT_DEFAULT : CU_EVENT_DISABLE_TIMING)); - - if (ProfilingEnabled) { - UR_CHECK_ERROR(cuEventCreate(&EvQueued, CU_EVENT_DEFAULT)); - UR_CHECK_ERROR(cuEventCreate(&EvStart, CU_EVENT_DEFAULT)); - } - - if (Queue != nullptr) { - urQueueRetain(Queue); - } + StreamToken{StreamToken}, EventID{0}, EvEnd{EvEnd}, EvStart{EvStart}, + EvQueued{EvQueued}, Queue{Queue}, Stream{Stream}, Context{Context} { + urQueueRetain(Queue); urContextRetain(Context); } @@ -46,8 +34,9 @@ ur_event_handle_t_::ur_event_handle_t_(ur_context_handle_t Context, CUevent EventNative) : CommandType{UR_COMMAND_EVENTS_WAIT}, RefCount{1}, HasOwnership{false}, HasBeenWaitedOn{false}, IsRecorded{false}, IsStarted{false}, - StreamToken{std::numeric_limits::max()}, EvEnd{EventNative}, - EvStart{nullptr}, EvQueued{nullptr}, Queue{nullptr}, Context{Context} { + StreamToken{std::numeric_limits::max()}, EventID{0}, + EvEnd{EventNative}, EvStart{nullptr}, EvQueued{nullptr}, Queue{nullptr}, + Stream{nullptr}, Context{Context} { urContextRetain(Context); } diff --git a/source/adapters/cuda/event.hpp b/source/adapters/cuda/event.hpp index 3e5f466716..390fd7833a 100644 --- a/source/adapters/cuda/event.hpp +++ b/source/adapters/cuda/event.hpp @@ -12,6 +12,7 @@ #include #include +#include "common.hpp" #include "queue.hpp" /// UR Event mapping to CUevent @@ -82,8 +83,18 @@ struct ur_event_handle_t_ { static ur_event_handle_t makeNative(ur_command_t Type, ur_queue_handle_t Queue, CUstream Stream, uint32_t StreamToken = std::numeric_limits::max()) { - return new ur_event_handle_t_(Type, Queue->getContext(), Queue, Stream, - StreamToken); + const bool ProfilingEnabled = + Queue->URFlags & UR_QUEUE_FLAG_PROFILING_ENABLE; + native_type EvEnd = nullptr, EvQueued = nullptr, EvStart = nullptr; + UR_CHECK_ERROR(cuEventCreate( + &EvEnd, ProfilingEnabled ? CU_EVENT_DEFAULT : CU_EVENT_DISABLE_TIMING)); + + if (ProfilingEnabled) { + UR_CHECK_ERROR(cuEventCreate(&EvQueued, CU_EVENT_DEFAULT)); + UR_CHECK_ERROR(cuEventCreate(&EvStart, CU_EVENT_DEFAULT)); + } + return new ur_event_handle_t_(Type, Queue->getContext(), Queue, EvEnd, + EvQueued, EvStart, Stream, StreamToken); } static ur_event_handle_t makeWithNative(ur_context_handle_t context, @@ -99,7 +110,8 @@ struct ur_event_handle_t_ { // This constructor is private to force programmers to use the makeNative / // make_user static members in order to create a pi_event for CUDA. ur_event_handle_t_(ur_command_t Type, ur_context_handle_t Context, - ur_queue_handle_t Queue, CUstream Stream, + ur_queue_handle_t Queue, native_type EvEnd, + native_type EvQueued, native_type EvStart, CUstream Stream, uint32_t StreamToken); // This constructor is private to force programmers to use the diff --git a/source/adapters/cuda/image.cpp b/source/adapters/cuda/image.cpp index 7ec53bd8bc..1f336dd2d7 100644 --- a/source/adapters/cuda/image.cpp +++ b/source/adapters/cuda/image.cpp @@ -234,7 +234,7 @@ cudaToUrImageChannelFormat(CUarray_format cuda_format, ur_result_t urTextureCreate(ur_sampler_handle_t hSampler, const ur_image_desc_t *pImageDesc, - CUDA_RESOURCE_DESC ResourceDesc, + const CUDA_RESOURCE_DESC &ResourceDesc, ur_exp_image_handle_t *phRetImage) { try { diff --git a/source/adapters/cuda/memory.hpp b/source/adapters/cuda/memory.hpp index e60e415d39..3a9a7e2d6b 100644 --- a/source/adapters/cuda/memory.hpp +++ b/source/adapters/cuda/memory.hpp @@ -190,7 +190,7 @@ struct ur_mem_handle_t_ { /// Constructs the UR allocation for an unsampled image object ur_mem_handle_t_(ur_context_handle_t Context, CUarray Array, CUsurfObject Surf, ur_mem_type_t ImageType) - : Context{Context}, RefCount{1}, MemType{Type::Surface}, + : Context{Context}, RefCount{1}, MemType{Type::Surface}, MemFlags{0}, Mem{ImageMem{Array, (void *)Surf, ImageType, nullptr}} { urContextRetain(Context); } @@ -198,7 +198,7 @@ struct ur_mem_handle_t_ { /// Constructs the UR allocation for a sampled image object ur_mem_handle_t_(ur_context_handle_t Context, CUarray Array, CUtexObject Tex, ur_sampler_handle_t Sampler, ur_mem_type_t ImageType) - : Context{Context}, RefCount{1}, MemType{Type::Texture}, + : Context{Context}, RefCount{1}, MemType{Type::Texture}, MemFlags{0}, Mem{ImageMem{Array, (void *)Tex, ImageType, Sampler}} { urContextRetain(Context); } diff --git a/source/adapters/cuda/program.cpp b/source/adapters/cuda/program.cpp index 6660c20d06..9b7959eb85 100644 --- a/source/adapters/cuda/program.cpp +++ b/source/adapters/cuda/program.cpp @@ -137,7 +137,8 @@ ur_result_t ur_program_handle_t_::buildProgram(const char *BuildOptions) { if (!this->BuildOptions.empty()) { unsigned int MaxRegs; - bool Valid = getMaxRegistersJitOptionValue(BuildOptions, MaxRegs); + const bool Valid = + getMaxRegistersJitOptionValue(this->BuildOptions, MaxRegs); if (Valid) { Options.push_back(CU_JIT_MAX_REGISTERS); OptionVals.push_back(reinterpret_cast(MaxRegs)); diff --git a/source/adapters/cuda/sampler.cpp b/source/adapters/cuda/sampler.cpp index 5c6b91de65..0e1305da23 100644 --- a/source/adapters/cuda/sampler.cpp +++ b/source/adapters/cuda/sampler.cpp @@ -17,7 +17,7 @@ urSamplerCreate(ur_context_handle_t hContext, const ur_sampler_desc_t *pDesc, std::unique_ptr Sampler{ new ur_sampler_handle_t_(hContext)}; - if (pDesc && pDesc->stype == UR_STRUCTURE_TYPE_SAMPLER_DESC) { + if (pDesc->stype == UR_STRUCTURE_TYPE_SAMPLER_DESC) { Sampler->Props |= pDesc->normalizedCoords; Sampler->Props |= pDesc->filterMode << 1; Sampler->Props |= pDesc->addressingMode << 2; diff --git a/source/adapters/hip/device.hpp b/source/adapters/hip/device.hpp index 83cc2ee954..cc06fac4aa 100644 --- a/source/adapters/hip/device.hpp +++ b/source/adapters/hip/device.hpp @@ -32,7 +32,7 @@ struct ur_device_handle_t_ { : HIPDevice(HipDevice), RefCount{1}, Platform(Platform), HIPContext(Context) {} - ~ur_device_handle_t_() { + ~ur_device_handle_t_() noexcept(false) { UR_CHECK_ERROR(hipDevicePrimaryCtxRelease(HIPDevice)); } diff --git a/source/adapters/hip/enqueue.cpp b/source/adapters/hip/enqueue.cpp index 1a73618c77..ebebcc27b5 100644 --- a/source/adapters/hip/enqueue.cpp +++ b/source/adapters/hip/enqueue.cpp @@ -885,15 +885,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead( UR_ASSERT(hImage->MemType == ur_mem_handle_t_::Type::Surface, UR_RESULT_ERROR_INVALID_MEM_OBJECT); - ur_result_t Result = UR_RESULT_SUCCESS; - try { ScopedContext Active(hQueue->getDevice()); hipStream_t HIPStream = hQueue->getNextTransferStream(); if (phEventWaitList) { - Result = enqueueEventsWait(hQueue, HIPStream, numEventsInWaitList, - phEventWaitList); + UR_CHECK_ERROR(enqueueEventsWait(hQueue, HIPStream, numEventsInWaitList, + phEventWaitList)); } hipArray *Array = std::get(hImage->Mem).getArray(); @@ -920,13 +918,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead( UR_CHECK_ERROR(RetImplEvent->start()); } - Result = commonEnqueueMemImageNDCopy(HIPStream, ImgType, AdjustedRegion, - Array, hipMemoryTypeArray, SrcOffset, - pDst, hipMemoryTypeHost, nullptr); - - if (Result != UR_RESULT_SUCCESS) { - return Result; - } + UR_CHECK_ERROR(commonEnqueueMemImageNDCopy( + HIPStream, ImgType, AdjustedRegion, Array, hipMemoryTypeArray, + SrcOffset, pDst, hipMemoryTypeHost, nullptr)); if (phEvent) { UR_CHECK_ERROR(RetImplEvent->record()); @@ -942,7 +936,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead( return UR_RESULT_ERROR_UNKNOWN; } return UR_RESULT_SUCCESS; - return Result; } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite( @@ -953,15 +946,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite( UR_ASSERT(hImage->MemType == ur_mem_handle_t_::Type::Surface, UR_RESULT_ERROR_INVALID_MEM_OBJECT); - ur_result_t Result = UR_RESULT_SUCCESS; - try { ScopedContext Active(hQueue->getDevice()); hipStream_t HIPStream = hQueue->getNextTransferStream(); if (phEventWaitList) { - Result = enqueueEventsWait(hQueue, HIPStream, numEventsInWaitList, - phEventWaitList); + UR_CHECK_ERROR(enqueueEventsWait(hQueue, HIPStream, numEventsInWaitList, + phEventWaitList)); } hipArray *Array = std::get(hImage->Mem).getArray(); @@ -988,13 +979,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite( UR_CHECK_ERROR(RetImplEvent->start()); } - Result = commonEnqueueMemImageNDCopy(HIPStream, ImgType, AdjustedRegion, - pSrc, hipMemoryTypeHost, nullptr, - Array, hipMemoryTypeArray, DstOffset); - - if (Result != UR_RESULT_SUCCESS) { - return Result; - } + UR_CHECK_ERROR(commonEnqueueMemImageNDCopy( + HIPStream, ImgType, AdjustedRegion, pSrc, hipMemoryTypeHost, nullptr, + Array, hipMemoryTypeArray, DstOffset)); if (phEvent) { UR_CHECK_ERROR(RetImplEvent->record()); @@ -1007,8 +994,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite( } return UR_RESULT_SUCCESS; - - return Result; } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageCopy( diff --git a/source/adapters/hip/event.cpp b/source/adapters/hip/event.cpp index 4871335c9f..7d37d8d9e3 100644 --- a/source/adapters/hip/event.cpp +++ b/source/adapters/hip/event.cpp @@ -19,7 +19,7 @@ ur_event_handle_t_::ur_event_handle_t_(ur_command_t Type, hipStream_t Stream, uint32_t StreamToken) : CommandType{Type}, RefCount{1}, HasOwnership{true}, HasBeenWaitedOn{false}, IsRecorded{false}, IsStarted{false}, - StreamToken{StreamToken}, EvEnd{nullptr}, EvStart{nullptr}, + StreamToken{StreamToken}, EventId{0}, EvEnd{nullptr}, EvStart{nullptr}, EvQueued{nullptr}, Queue{Queue}, Stream{Stream}, Context{Context} { bool ProfilingEnabled = Queue->URFlags & UR_QUEUE_FLAG_PROFILING_ENABLE; @@ -32,9 +32,7 @@ ur_event_handle_t_::ur_event_handle_t_(ur_command_t Type, UR_CHECK_ERROR(hipEventCreateWithFlags(&EvStart, hipEventDefault)); } - if (Queue != nullptr) { - urQueueRetain(Queue); - } + urQueueRetain(Queue); urContextRetain(Context); } @@ -42,8 +40,9 @@ ur_event_handle_t_::ur_event_handle_t_(ur_context_handle_t Context, hipEvent_t EventNative) : CommandType{UR_COMMAND_EVENTS_WAIT}, RefCount{1}, HasOwnership{false}, HasBeenWaitedOn{false}, IsRecorded{false}, IsStarted{false}, - StreamToken{std::numeric_limits::max()}, EvEnd{EventNative}, - EvStart{nullptr}, EvQueued{nullptr}, Queue{nullptr}, Context{Context} { + StreamToken{std::numeric_limits::max()}, EventId{0}, + EvEnd{EventNative}, EvStart{nullptr}, EvQueued{nullptr}, Queue{nullptr}, + Stream{nullptr}, Context{Context} { urContextRetain(Context); } @@ -72,7 +71,7 @@ ur_result_t ur_event_handle_t_::start() { return Result; } -bool ur_event_handle_t_::isCompleted() const noexcept { +bool ur_event_handle_t_::isCompleted() const { if (!IsRecorded) { return false; } @@ -225,8 +224,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetInfo(ur_event_handle_t hEvent, return ReturnValue(hEvent->getCommandType()); case UR_EVENT_INFO_REFERENCE_COUNT: return ReturnValue(hEvent->getReferenceCount()); - case UR_EVENT_INFO_COMMAND_EXECUTION_STATUS: - return ReturnValue(hEvent->getExecutionStatus()); + case UR_EVENT_INFO_COMMAND_EXECUTION_STATUS: { + try { + return ReturnValue(hEvent->getExecutionStatus()); + } catch (ur_result_t Error) { + return Error; + } + } case UR_EVENT_INFO_CONTEXT: return ReturnValue(hEvent->getContext()); default: diff --git a/source/adapters/hip/event.hpp b/source/adapters/hip/event.hpp index bfa05b59d7..0cf363d263 100644 --- a/source/adapters/hip/event.hpp +++ b/source/adapters/hip/event.hpp @@ -40,10 +40,9 @@ struct ur_event_handle_t_ { bool isStarted() const noexcept { return IsStarted; } - bool isCompleted() const noexcept; - - uint32_t getExecutionStatus() const noexcept { + bool isCompleted() const; + uint32_t getExecutionStatus() const { if (!isRecorded()) { return UR_EVENT_STATUS_SUBMITTED; } diff --git a/source/adapters/hip/memory.hpp b/source/adapters/hip/memory.hpp index 2732b22a6e..6b3ba73789 100644 --- a/source/adapters/hip/memory.hpp +++ b/source/adapters/hip/memory.hpp @@ -173,7 +173,7 @@ struct ur_mem_handle_t_ { urContextRetain(Context); } - ~ur_mem_handle_t_() { + ~ur_mem_handle_t_() noexcept(false) { if (isBuffer() && isSubBuffer()) { urMemRelease(std::get(Mem).Parent); return; @@ -183,7 +183,7 @@ struct ur_mem_handle_t_ { bool isBuffer() const noexcept { return MemType == Type::Buffer; } - bool isSubBuffer() const noexcept { + bool isSubBuffer() const { return (isBuffer() && (std::get(Mem).Parent != nullptr)); } diff --git a/source/adapters/hip/program.cpp b/source/adapters/hip/program.cpp index 2c71c53208..fa38384e62 100644 --- a/source/adapters/hip/program.cpp +++ b/source/adapters/hip/program.cpp @@ -421,7 +421,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary( ur_context_handle_t hContext, ur_device_handle_t hDevice, size_t size, const uint8_t *pBinary, const ur_program_properties_t *pProperties, ur_program_handle_t *phProgram) { - UR_ASSERT(pBinary != nullptr && size != 0, UR_RESULT_ERROR_INVALID_BINARY); UR_ASSERT(hContext->getDevice()->get() == hDevice->get(), UR_RESULT_ERROR_INVALID_CONTEXT); diff --git a/source/adapters/hip/sampler.cpp b/source/adapters/hip/sampler.cpp index 5a177d6a9f..1ee1996164 100644 --- a/source/adapters/hip/sampler.cpp +++ b/source/adapters/hip/sampler.cpp @@ -58,7 +58,6 @@ ur_result_t urSamplerGetInfo(ur_sampler_handle_t hSampler, default: return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; } - return {}; } ur_result_t urSamplerRetain(ur_sampler_handle_t hSampler) { diff --git a/source/adapters/opencl/program.cpp b/source/adapters/opencl/program.cpp index c6e11fe06c..f628c8152b 100644 --- a/source/adapters/opencl/program.cpp +++ b/source/adapters/opencl/program.cpp @@ -347,12 +347,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramSetSpecializationConstants( &Ctx, &RetSize)); std::unique_ptr> DevicesInCtx; - cl_adapter::getDevicesFromContext(cl_adapter::cast(Ctx), - DevicesInCtx); + UR_RETURN_ON_FAILURE(cl_adapter::getDevicesFromContext( + cl_adapter::cast(Ctx), DevicesInCtx)); cl_platform_id CurPlatform; - clGetDeviceInfo((*DevicesInCtx)[0], CL_DEVICE_PLATFORM, - sizeof(cl_platform_id), &CurPlatform, nullptr); + CL_RETURN_ON_FAILURE(clGetDeviceInfo((*DevicesInCtx)[0], CL_DEVICE_PLATFORM, + sizeof(cl_platform_id), &CurPlatform, + nullptr)); oclv::OpenCLVersion PlatVer; cl_adapter::getPlatformVersion(CurPlatform, PlatVer); @@ -364,7 +365,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramSetSpecializationConstants( for (cl_device_id Dev : *DevicesInCtx) { oclv::OpenCLVersion DevVer; - cl_adapter::getDeviceVersion(Dev, DevVer); + UR_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion(Dev, DevVer)); if (DevVer < oclv::V2_2) { UseExtensionLookup = true;