diff --git a/sycl/include/CL/sycl/detail/pi.h b/sycl/include/CL/sycl/detail/pi.h index 6718261c485b8..abb13f90ddb9f 100644 --- a/sycl/include/CL/sycl/detail/pi.h +++ b/sycl/include/CL/sycl/detail/pi.h @@ -508,6 +508,7 @@ constexpr pi_mem_flags PI_MEM_FLAGS_ACCESS_RW = CL_MEM_READ_WRITE; // Host pointer constexpr pi_mem_flags PI_MEM_FLAGS_HOST_PTR_USE = CL_MEM_USE_HOST_PTR; constexpr pi_mem_flags PI_MEM_FLAGS_HOST_PTR_COPY = CL_MEM_COPY_HOST_PTR; +constexpr pi_mem_flags PI_MEM_FLAGS_HOST_PTR_ALLOC = CL_MEM_ALLOC_HOST_PTR; // NOTE: queue properties are implemented this way to better support bit // manipulations diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index d1e0722e153c7..b8040a828c426 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -1511,6 +1511,10 @@ pi_result cuda_piMemBufferCreate(pi_context context, pi_mem_flags flags, cuMemHostRegister(host_ptr, size, CU_MEMHOSTREGISTER_DEVICEMAP)); retErr = PI_CHECK_ERROR(cuMemHostGetDevicePointer(&ptr, host_ptr, 0)); allocMode = _pi_mem::alloc_mode::use_host_ptr; + } else if (flags & PI_MEM_FLAGS_HOST_PTR_ALLOC) { + retErr = PI_CHECK_ERROR(cuMemAllocHost(&host_ptr, size)); + retErr = PI_CHECK_ERROR(cuMemHostGetDevicePointer(&ptr, host_ptr, 0)); + allocMode = _pi_mem::alloc_mode::alloc_host_ptr; } else { retErr = PI_CHECK_ERROR(cuMemAlloc(&ptr, size)); if (flags & PI_MEM_FLAGS_HOST_PTR_COPY) { @@ -1582,6 +1586,8 @@ pi_result cuda_piMemRelease(pi_mem memObj) { case _pi_mem::alloc_mode::use_host_ptr: ret = PI_CHECK_ERROR(cuMemHostUnregister(uniqueMemObj->hostPtr_)); break; + case _pi_mem::alloc_mode::alloc_host_ptr: + ret = PI_CHECK_ERROR(cuMemFreeHost(uniqueMemObj->hostPtr_)); }; } diff --git a/sycl/plugins/cuda/pi_cuda.hpp b/sycl/plugins/cuda/pi_cuda.hpp index dea8292f03c04..f8b29509e27c4 100644 --- a/sycl/plugins/cuda/pi_cuda.hpp +++ b/sycl/plugins/cuda/pi_cuda.hpp @@ -197,8 +197,14 @@ struct _pi_mem { * use_host_ptr: Use an address on the host for the device * copy_in: The data for the device comes from the host but the host pointer is not available later for re-use + * alloc_host_ptr: Uses pinned-memory allocation */ - enum class alloc_mode { classic, use_host_ptr, copy_in } allocMode_; + enum class alloc_mode { + classic, + use_host_ptr, + copy_in, + alloc_host_ptr + } allocMode_; _pi_mem(pi_context ctxt, pi_mem parent, alloc_mode mode, CUdeviceptr ptr, void *host_ptr, size_t size) diff --git a/sycl/unittests/pi/cuda/test_mem_obj.cpp b/sycl/unittests/pi/cuda/test_mem_obj.cpp index bdf22e4e44cd0..e941f3af8a1ba 100644 --- a/sycl/unittests/pi/cuda/test_mem_obj.cpp +++ b/sycl/unittests/pi/cuda/test_mem_obj.cpp @@ -73,6 +73,18 @@ TEST_F(CudaTestMemObj, piMemBufferCreateSimple) { PI_SUCCESS); } +TEST_F(CudaTestMemObj, piMemBufferAllocHost) { + const size_t memSize = 1024u; + pi_mem memObj; + ASSERT_EQ((plugin.call_nocheck( + context_, PI_MEM_FLAGS_ACCESS_RW | PI_MEM_FLAGS_HOST_PTR_ALLOC, + memSize, nullptr, &memObj)), + PI_SUCCESS); + + ASSERT_EQ((plugin.call_nocheck(memObj)), + PI_SUCCESS); +} + TEST_F(CudaTestMemObj, piMemBufferCreateNoActiveContext) { const size_t memSize = 1024u; // Context has been destroyed