From 249e57ba3f78d0fe7fff228492dfe3ddff133469 Mon Sep 17 00:00:00 2001 From: Artur Gainullin Date: Tue, 19 May 2020 01:50:56 +0300 Subject: [PATCH 1/2] [SYCL] Untie PI functions from OpenCL * Introduce pi_buffer_region type and use it instead of cl_buffer_region * Introduce pi_fp_capabilities enum and use it in PI functions * Fix type of mem advice parameter in piextUSMEnqueueMemAdvise * Use pi_event_info instead of cl_event_info * Extend pi_device_info with subgroup properties * Extend pi_device_info with enum values to query subgroup information and IL version, this values are going to be used level zero plugin Signed-off-by: Artur Gainullin --- sycl/include/CL/sycl/detail/pi.h | 34 +++++++++++++++++++++---- sycl/include/CL/sycl/info/info_desc.hpp | 10 +++++--- sycl/plugins/cuda/pi_cuda.cpp | 14 +++++----- sycl/plugins/opencl/pi_opencl.cpp | 3 ++- sycl/source/detail/memory_manager.cpp | 3 +-- 5 files changed, 45 insertions(+), 19 deletions(-) diff --git a/sycl/include/CL/sycl/detail/pi.h b/sycl/include/CL/sycl/detail/pi.h index 301d3c9d8f85a..73bf9cd9b6c11 100644 --- a/sycl/include/CL/sycl/detail/pi.h +++ b/sycl/include/CL/sycl/detail/pi.h @@ -224,6 +224,7 @@ typedef enum { PI_DEVICE_INFO_BUILT_IN_KERNELS = CL_DEVICE_BUILT_IN_KERNELS, PI_DEVICE_INFO_PLATFORM = CL_DEVICE_PLATFORM, PI_DEVICE_INFO_REFERENCE_COUNT = CL_DEVICE_REFERENCE_COUNT, + PI_DEVICE_INFO_IL_VERSION = CL_DEVICE_IL_VERSION_KHR, PI_DEVICE_INFO_NAME = CL_DEVICE_NAME, PI_DEVICE_INFO_VENDOR = CL_DEVICE_VENDOR, PI_DEVICE_INFO_DRIVER_VERSION = CL_DRIVER_VERSION, @@ -241,6 +242,10 @@ typedef enum { PI_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN = CL_DEVICE_PARTITION_AFFINITY_DOMAIN, PI_DEVICE_INFO_PARTITION_TYPE = CL_DEVICE_PARTITION_TYPE, + PI_DEVICE_INFO_MAX_NUM_SUB_GROUPS = CL_DEVICE_MAX_NUM_SUB_GROUPS, + PI_DEVICE_INFO_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS = + CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS, + PI_DEVICE_INFO_SUB_GROUP_SIZES_INTEL = CL_DEVICE_SUB_GROUP_SIZES_INTEL, PI_DEVICE_INFO_USM_HOST_SUPPORT = CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL, PI_DEVICE_INFO_USM_DEVICE_SUPPORT = CL_DEVICE_DEVICE_MEM_CAPABILITIES_INTEL, PI_DEVICE_INFO_USM_SINGLE_SHARED_SUPPORT = @@ -299,6 +304,16 @@ typedef enum { PI_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE = CL_KERNEL_PRIVATE_MEM_SIZE } _pi_kernel_group_info; +typedef enum { + PI_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT = CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT, + PI_FP_ROUND_TO_NEAREST = CL_FP_ROUND_TO_NEAREST, + PI_FP_ROUND_TO_ZERO = CL_FP_ROUND_TO_ZERO, + PI_FP_ROUND_TO_INF = CL_FP_ROUND_TO_INF, + PI_FP_INF_NAN = CL_FP_INF_NAN, + PI_FP_DENORM = CL_FP_DENORM, + PI_FP_FMA = CL_FP_FMA +} _pi_fp_capabilities; + typedef enum { PI_IMAGE_INFO_FORMAT = CL_IMAGE_FORMAT, PI_IMAGE_INFO_ELEMENT_SIZE = CL_IMAGE_ELEMENT_SIZE, @@ -512,6 +527,7 @@ using pi_image_info = _pi_image_info; using pi_kernel_info = _pi_kernel_info; using pi_kernel_group_info = _pi_kernel_group_info; using pi_kernel_sub_group_info = _pi_kernel_sub_group_info; +using pi_fp_capabilities = _pi_fp_capabilities; using pi_event_info = _pi_event_info; using pi_command_type = _pi_command_type; using pi_mem_type = _pi_mem_type; @@ -678,6 +694,13 @@ struct pi_device_binary_struct { }; using pi_device_binary = pi_device_binary_struct *; +// pi_buffer_region structure repeats cl_buffer_region +struct pi_buffer_region_struct { + size_t origin; + size_t size; +}; +using pi_buffer_region_struct *pi_buffer_region; + // Offload binaries descriptor version supported by this library. static const uint16_t PI_DEVICE_BINARIES_VERSION = 1; @@ -1118,10 +1141,10 @@ __SYCL_EXPORT pi_result piKernelSetExecInfo(pi_kernel kernel, // __SYCL_EXPORT pi_result piEventCreate(pi_context context, pi_event *ret_event); -__SYCL_EXPORT pi_result piEventGetInfo( - pi_event event, - cl_event_info param_name, // TODO: untie from OpenCL - size_t param_value_size, void *param_value, size_t *param_value_size_ret); +__SYCL_EXPORT pi_result piEventGetInfo(pi_event event, pi_event_info param_name, + size_t param_value_size, + void *param_value, + size_t *param_value_size_ret); __SYCL_EXPORT pi_result piEventGetProfilingInfo(pi_event event, pi_profiling_info param_name, @@ -1439,7 +1462,8 @@ __SYCL_EXPORT pi_result piextUSMEnqueuePrefetch( // USM memadvise API to govern behavior of automatic migration mechanisms __SYCL_EXPORT pi_result piextUSMEnqueueMemAdvise(pi_queue queue, const void *ptr, size_t length, - int advice, pi_event *event); + pi_mem_advice advice, + pi_event *event); /// API to query information about USM allocated pointers /// Valid Queries: diff --git a/sycl/include/CL/sycl/info/info_desc.hpp b/sycl/include/CL/sycl/info/info_desc.hpp index 438e0ffeb21ea..011d633356670 100644 --- a/sycl/include/CL/sycl/info/info_desc.hpp +++ b/sycl/include/CL/sycl/info/info_desc.hpp @@ -117,6 +117,8 @@ enum class device : cl_device_info { partition_affinity_domains = CL_DEVICE_PARTITION_AFFINITY_DOMAIN, partition_type_affinity_domain = CL_DEVICE_PARTITION_TYPE, reference_count = CL_DEVICE_REFERENCE_COUNT, + il_version = + CL_DEVICE_IL_VERSION_KHR, // Same as CL_DEVICE_IL_VERSION for >=OpenCL 2.1 max_num_sub_groups = CL_DEVICE_MAX_NUM_SUB_GROUPS, sub_group_independent_forward_progress = CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS, @@ -124,11 +126,11 @@ enum class device : cl_device_info { partition_type_property, kernel_kernel_pipe_support, // USM - usm_device_allocations = PI_USM_DEVICE_SUPPORT, - usm_host_allocations = PI_USM_HOST_SUPPORT, - usm_shared_allocations = PI_USM_SINGLE_SHARED_SUPPORT, + usm_device_allocations = PI_USM_DEVICE_SUPPORT, + usm_host_allocations = PI_USM_HOST_SUPPORT, + usm_shared_allocations = PI_USM_SINGLE_SHARED_SUPPORT, usm_restricted_shared_allocations = PI_USM_CROSS_SHARED_SUPPORT, - usm_system_allocator = PI_USM_SYSTEM_SHARED_SUPPORT + usm_system_allocator = PI_USM_SYSTEM_SHARED_SUPPORT }; enum class device_type : pi_uint64 { diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index 8a4d9540334a4..c650a478b5ca9 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -1029,15 +1029,15 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name, } case PI_DEVICE_INFO_SINGLE_FP_CONFIG: { // TODO: is this config consistent across all NVIDIA GPUs? - auto config = CL_FP_DENORM | CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST | - CL_FP_ROUND_TO_ZERO | CL_FP_ROUND_TO_INF | CL_FP_FMA | - CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT; + auto config = PI_FP_DENORM | PI_FP_INF_NAN | PI_FP_ROUND_TO_NEAREST | + PI_FP_ROUND_TO_ZERO | PI_FP_ROUND_TO_INF | PI_FP_FMA | + PI_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT; return getInfo(param_value_size, param_value, param_value_size_ret, config); } case PI_DEVICE_INFO_DOUBLE_FP_CONFIG: { // TODO: is this config consistent across all NVIDIA GPUs? - auto config = CL_FP_DENORM | CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST | - CL_FP_ROUND_TO_ZERO | CL_FP_ROUND_TO_INF | CL_FP_FMA; + auto config = PI_FP_DENORM | PI_FP_INF_NAN | PI_FP_ROUND_TO_NEAREST | + PI_FP_ROUND_TO_ZERO | PI_FP_ROUND_TO_INF | PI_FP_FMA; return getInfo(param_value_size, param_value, param_value_size_ret, config); } case PI_DEVICE_INFO_GLOBAL_MEM_CACHE_TYPE: { @@ -1674,7 +1674,7 @@ pi_result cuda_piMemBufferPartition(pi_mem parent_buffer, pi_mem_flags flags, assert(memObj != nullptr); const auto bufferRegion = - *reinterpret_cast(buffer_create_info); + *reinterpret_cast(buffer_create_info); assert((bufferRegion.size != 0u) && "PI_INVALID_BUFFER_SIZE"); assert((bufferRegion.origin <= (bufferRegion.origin + bufferRegion.size)) && @@ -3596,7 +3596,7 @@ pi_result cuda_piextUSMEnqueuePrefetch(pi_queue queue, const void *ptr, /// USM: memadvise API to govern behavior of automatic migration mechanisms pi_result cuda_piextUSMEnqueueMemAdvise(pi_queue queue, const void *ptr, - size_t length, int advice, + size_t length, pi_mem_advice advice, pi_event *event) { assert(queue != nullptr); assert(ptr != nullptr); diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index 855137f905e3d..69bc21a64f01f 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -939,7 +939,8 @@ pi_result piextUSMEnqueuePrefetch(pi_queue queue, const void *ptr, size_t size, /// \param event is the event that represents this operation // USM memadvise API to govern behavior of automatic migration mechanisms pi_result piextUSMEnqueueMemAdvise(pi_queue queue, const void *ptr, - size_t length, int advice, pi_event *event) { + size_t length, pi_mem_advice advice, + pi_event *event) { return cast( clEnqueueMarkerWithWaitList(cast(queue), 0, nullptr, diff --git a/sycl/source/detail/memory_manager.cpp b/sycl/source/detail/memory_manager.cpp index 5482dea68d784..c2718d7ae05c5 100644 --- a/sycl/source/detail/memory_manager.cpp +++ b/sycl/source/detail/memory_manager.cpp @@ -189,8 +189,7 @@ void *MemoryManager::allocateMemSubBuffer(ContextImplPtr TargetContext, SizeInBytes *= Range[I]; RT::PiResult Error = PI_SUCCESS; - // TODO replace with pi_buffer_region - cl_buffer_region Region{Offset, SizeInBytes}; + pi_buffer_region_struct Region{Offset, SizeInBytes}; RT::PiMem NewMem; const detail::plugin &Plugin = TargetContext->getPlugin(); Error = Plugin.call_nocheck( From 20ec9f02c7b796aeb314e5cb10a67459f4d3c95d Mon Sep 17 00:00:00 2001 From: Artur Gainullin Date: Tue, 19 May 2020 23:55:07 +0300 Subject: [PATCH 2/2] Fix using statement Signed-off-by: Artur Gainullin --- sycl/include/CL/sycl/detail/pi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/include/CL/sycl/detail/pi.h b/sycl/include/CL/sycl/detail/pi.h index 73bf9cd9b6c11..1c3efb5db1550 100644 --- a/sycl/include/CL/sycl/detail/pi.h +++ b/sycl/include/CL/sycl/detail/pi.h @@ -699,7 +699,7 @@ struct pi_buffer_region_struct { size_t origin; size_t size; }; -using pi_buffer_region_struct *pi_buffer_region; +using pi_buffer_region = pi_buffer_region_struct *; // Offload binaries descriptor version supported by this library. static const uint16_t PI_DEVICE_BINARIES_VERSION = 1;