From 6fd84ca411403999508dddd199a3a8359eeb4bc6 Mon Sep 17 00:00:00 2001 From: Ruyman Reyes Date: Fri, 22 May 2020 10:20:06 +0000 Subject: [PATCH 1/2] [SYCL][CUDA] Using Custom context by default Performance analysis shows is better the default behaviour of SYCL Queues on CUDA backend is to use the non-primary context. Primary context will be exposed for interop with CUDA Runtime API on a separate extension as a property. Signed-off-by: Ruyman Reyes --- sycl/source/detail/context_impl.cpp | 5 +++-- sycl/source/detail/queue_impl.hpp | 10 +++++++++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/sycl/source/detail/context_impl.cpp b/sycl/source/detail/context_impl.cpp index 8353396e7792d..6ef28b6a3189c 100644 --- a/sycl/source/detail/context_impl.cpp +++ b/sycl/source/detail/context_impl.cpp @@ -43,8 +43,9 @@ context_impl::context_impl(const vector_class Devices, if (MPlatform->is_cuda()) { #if USE_PI_CUDA - const pi_context_properties props[] = {PI_CONTEXT_PROPERTIES_CUDA_PRIMARY, - UseCUDAPrimaryContext, 0}; + const pi_context_properties props[] = { + static_cast(PI_CONTEXT_PROPERTIES_CUDA_PRIMARY), + static_cast(UseCUDAPrimaryContext), 0}; getPlugin().call(props, DeviceIds.size(), DeviceIds.data(), nullptr, nullptr, &MContext); diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index 367bd95746614..97d6449c69a39 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -33,6 +33,13 @@ using DeviceImplPtr = shared_ptr_class; /// Sets max number of queues supported by FPGA RT. const size_t MaxNumQueues = 256; +//// Possible CUDA context types supported by PI CUDA backend +/// TODO: Implement this as a property once there is an extension document +enum class cuda_context_type : char { primary, custom }; + +/// Default context type created for CUDA backend +constexpr cuda_context_type DefaultContextType = cuda_context_type::custom; + enum QueueOrder { Ordered, OOO }; class queue_impl { @@ -50,7 +57,8 @@ class queue_impl { const property_list &PropList) : queue_impl(Device, detail::getSyclObjImpl(context( - createSyclObjFromImpl(Device), {}, true)), + createSyclObjFromImpl(Device), {}, + (DefaultContextType == cuda_context_type::primary))), AsyncHandler, Order, PropList){}; /// Constructs a SYCL queue with an async_handler and property_list provided From af5f78cda2ff487a2a24be264e367d033e05d678 Mon Sep 17 00:00:00 2001 From: Ruyman Reyes Date: Fri, 22 May 2020 12:24:45 +0000 Subject: [PATCH 2/2] [SYCL][CUDA] Fixed missing ScopedContext on Map/Unmap Signed-off-by: Ruyman Reyes --- sycl/plugins/cuda/pi_cuda.cpp | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index 026ccd4daf490..f8f0ed3e3e13d 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -3340,6 +3340,7 @@ pi_result cuda_piEnqueueMemBufferMap(pi_queue command_queue, pi_mem buffer, pi_event *retEvent, void **ret_map) { assert(ret_map != nullptr); + assert(command_queue != nullptr); pi_result ret_err = PI_INVALID_OPERATION; @@ -3361,9 +3362,15 @@ pi_result cuda_piEnqueueMemBufferMap(pi_queue command_queue, pi_mem buffer, num_events_in_wait_list, event_wait_list, retEvent); } else { if (retEvent) { - *retEvent = - _pi_event::make_native(PI_COMMAND_TYPE_MEM_BUFFER_MAP, command_queue); - (*retEvent)->record(); + try { + ScopedContext active(command_queue->get_context()); + + *retEvent = _pi_event::make_native(PI_COMMAND_TYPE_MEM_BUFFER_MAP, + command_queue); + (*retEvent)->record(); + } catch (pi_result error) { + ret_err = error; + } } } @@ -3380,6 +3387,7 @@ pi_result cuda_piEnqueueMemUnmap(pi_queue command_queue, pi_mem memobj, pi_event *retEvent) { pi_result ret_err = PI_SUCCESS; + assert(command_queue != nullptr); assert(mapped_ptr != nullptr); assert(memobj != nullptr); assert(memobj->get_map_ptr() != nullptr); @@ -3393,9 +3401,15 @@ pi_result cuda_piEnqueueMemUnmap(pi_queue command_queue, pi_mem memobj, retEvent); } else { if (retEvent) { - *retEvent = _pi_event::make_native(PI_COMMAND_TYPE_MEM_BUFFER_UNMAP, - command_queue); - (*retEvent)->record(); + try { + ScopedContext active(command_queue->get_context()); + + *retEvent = _pi_event::make_native(PI_COMMAND_TYPE_MEM_BUFFER_UNMAP, + command_queue); + (*retEvent)->record(); + } catch (pi_result error) { + ret_err = error; + } } }