diff --git a/dpnp/backend/kernels/dpnp_krnl_arraycreation.cpp b/dpnp/backend/kernels/dpnp_krnl_arraycreation.cpp index 302289220f81..3b7f362d51a5 100644 --- a/dpnp/backend/kernels/dpnp_krnl_arraycreation.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_arraycreation.cpp @@ -493,6 +493,9 @@ DPCTLSyclEventRef dpnp_ptp_c(DPCTLSyclQueueRef q_ref, (void)dep_event_vec_ref; DPCTLSyclEventRef event_ref = nullptr; + DPCTLSyclEventRef e1_ref = nullptr; + DPCTLSyclEventRef e2_ref = nullptr; + DPCTLSyclEventRef e3_ref = nullptr; if ((input1_in == nullptr) || (result1_out == nullptr)) { @@ -514,29 +517,36 @@ DPCTLSyclEventRef dpnp_ptp_c(DPCTLSyclQueueRef q_ref, _DataType* min_arr = reinterpret_cast<_DataType*>(sycl::malloc_shared(result_size * sizeof(_DataType), q)); _DataType* max_arr = reinterpret_cast<_DataType*>(sycl::malloc_shared(result_size * sizeof(_DataType), q)); - dpnp_min_c<_DataType>(arr, min_arr, result_size, input_shape, input_ndim, axis, naxis); - dpnp_max_c<_DataType>(arr, max_arr, result_size, input_shape, input_ndim, axis, naxis); + e1_ref = dpnp_min_c<_DataType>(q_ref, arr, min_arr, result_size, input_shape, input_ndim, axis, naxis, NULL); + e2_ref = dpnp_max_c<_DataType>(q_ref, arr, max_arr, result_size, input_shape, input_ndim, axis, naxis, NULL); shape_elem_type* _strides = reinterpret_cast(sycl::malloc_shared(result_ndim * sizeof(shape_elem_type), q)); get_shape_offsets_inkernel(result_shape, result_ndim, _strides); - dpnp_subtract_c<_DataType, _DataType, _DataType>(result, - result_size, - result_ndim, - result_shape, - result_strides, - max_arr, - result_size, - result_ndim, - result_shape, - _strides, - min_arr, - result_size, - result_ndim, - result_shape, - _strides, - NULL); + e3_ref = dpnp_subtract_c<_DataType, _DataType, _DataType>(q_ref, result, + result_size, + result_ndim, + result_shape, + result_strides, + max_arr, + result_size, + result_ndim, + result_shape, + _strides, + min_arr, + result_size, + result_ndim, + result_shape, + _strides, + NULL, NULL); + + DPCTLEvent_Wait(e1_ref); + DPCTLEvent_Wait(e2_ref); + DPCTLEvent_Wait(e3_ref); + DPCTLEvent_Delete(e1_ref); + DPCTLEvent_Delete(e2_ref); + DPCTLEvent_Delete(e3_ref); sycl::free(min_arr, q); sycl::free(max_arr, q); @@ -576,6 +586,7 @@ void dpnp_ptp_c(void* result1_out, naxis, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template diff --git a/dpnp/backend/kernels/dpnp_krnl_bitwise.cpp b/dpnp/backend/kernels/dpnp_krnl_bitwise.cpp index 4d0f6498ed0f..b64670be4e09 100644 --- a/dpnp/backend/kernels/dpnp_krnl_bitwise.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_bitwise.cpp @@ -148,16 +148,16 @@ static void func_map_init_bitwise_1arg_1type(func_map_t& fmap) \ sycl::queue q = *(reinterpret_cast(q_ref)); \ \ - DPNPC_ptr_adapter<_DataType> input1_ptr(q_ref, input1_in, input1_size); \ - DPNPC_ptr_adapter input1_shape_ptr(q_ref, input1_shape, input1_ndim, true); \ - DPNPC_ptr_adapter input1_strides_ptr(q_ref, input1_strides, input1_ndim, true); \ + DPNPC_ptr_adapter<_DataType> input1_ptr(q_ref, input1_in, input1_size); \ + DPNPC_ptr_adapter input1_shape_ptr(q_ref, input1_shape, input1_ndim, true); \ + DPNPC_ptr_adapter input1_strides_ptr(q_ref, input1_strides, input1_ndim, true); \ \ - DPNPC_ptr_adapter<_DataType> input2_ptr(q_ref, input2_in, input2_size); \ - DPNPC_ptr_adapter input2_shape_ptr(q_ref, input2_shape, input2_ndim, true); \ - DPNPC_ptr_adapter input2_strides_ptr(q_ref, input2_strides, input2_ndim, true); \ + DPNPC_ptr_adapter<_DataType> input2_ptr(q_ref, input2_in, input2_size); \ + DPNPC_ptr_adapter input2_shape_ptr(q_ref, input2_shape, input2_ndim, true); \ + DPNPC_ptr_adapter input2_strides_ptr(q_ref, input2_strides, input2_ndim, true); \ \ - DPNPC_ptr_adapter<_DataType> result_ptr(q_ref, result_out, result_size, false, true); \ - DPNPC_ptr_adapter result_strides_ptr(q_ref, result_strides, result_ndim); \ + DPNPC_ptr_adapter<_DataType> result_ptr(q_ref, result_out, result_size, false, true); \ + DPNPC_ptr_adapter result_strides_ptr(q_ref, result_strides, result_ndim); \ \ _DataType* input1_data = input1_ptr.get_ptr(); \ shape_elem_type* input1_shape_data = input1_shape_ptr.get_ptr(); \ @@ -226,6 +226,14 @@ static void func_map_init_bitwise_1arg_1type(func_map_t& fmap) }; \ event = q.submit(kernel_func); \ } \ + input1_ptr.depends_on(event); \ + input1_shape_ptr.depends_on(event); \ + input1_strides_ptr.depends_on(event); \ + input2_ptr.depends_on(event); \ + input2_shape_ptr.depends_on(event); \ + input2_strides_ptr.depends_on(event); \ + result_ptr.depends_on(event); \ + result_strides_ptr.depends_on(event); \ event_ref = reinterpret_cast(&event); \ \ return DPCTLEvent_Copy(event_ref); \ diff --git a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp index 98aff7eac758..63b6195e7889 100644 --- a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp @@ -143,6 +143,12 @@ } \ } \ \ + input1_ptr.depends_on(event); \ + input1_shape_ptr.depends_on(event); \ + input1_strides_ptr.depends_on(event); \ + result_ptr.depends_on(event); \ + result_strides_ptr.depends_on(event); \ + \ event_ref = reinterpret_cast(&event); \ \ return DPCTLEvent_Copy(event_ref); \ @@ -644,6 +650,12 @@ static void func_map_init_elemwise_1arg_2type(func_map_t& fmap) } \ } \ \ + input1_ptr.depends_on(event); \ + input1_shape_ptr.depends_on(event); \ + input1_strides_ptr.depends_on(event); \ + result_ptr.depends_on(event); \ + result_strides_ptr.depends_on(event); \ + \ event_ref = reinterpret_cast(&event); \ \ return DPCTLEvent_Copy(event_ref); \ @@ -998,6 +1010,17 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap) event = q.submit(kernel_func); \ } \ } \ + \ + input1_ptr.depends_on(event); \ + input1_shape_ptr.depends_on(event); \ + input1_strides_ptr.depends_on(event); \ + input2_ptr.depends_on(event); \ + input2_shape_ptr.depends_on(event); \ + input2_strides_ptr.depends_on(event); \ + result_ptr.depends_on(event); \ + result_shape_ptr.depends_on(event); \ + result_strides_ptr.depends_on(event); \ + \ event_ref = reinterpret_cast(&event); \ \ return DPCTLEvent_Copy(event_ref); \ diff --git a/dpnp/backend/kernels/dpnp_krnl_indexing.cpp b/dpnp/backend/kernels/dpnp_krnl_indexing.cpp index ef6fffb8fb60..d37e319b7e3b 100644 --- a/dpnp/backend/kernels/dpnp_krnl_indexing.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_indexing.cpp @@ -901,7 +901,7 @@ DPCTLSyclEventRef dpnp_take_c(DPCTLSyclQueueRef q_ref, DPCTLSyclEventRef event_ref = nullptr; sycl::queue q = *(reinterpret_cast(q_ref)); - DPNPC_ptr_adapter<_DataType> input1_ptr(q_ref, array1_in, array1_size, true); + DPNPC_ptr_adapter<_DataType> input1_ptr(q_ref, array1_in, array1_size); DPNPC_ptr_adapter<_IndecesType> input2_ptr(q_ref, indices1, size); _DataType* array_1 = input1_ptr.get_ptr(); _IndecesType* indices = input2_ptr.get_ptr(); diff --git a/dpnp/backend/kernels/dpnp_krnl_mathematical.cpp b/dpnp/backend/kernels/dpnp_krnl_mathematical.cpp index dc2493094a26..32f8ffe465d2 100644 --- a/dpnp/backend/kernels/dpnp_krnl_mathematical.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_mathematical.cpp @@ -170,6 +170,8 @@ DPCTLSyclEventRef dpnp_elemwise_absolute_c(DPCTLSyclQueueRef q_ref, event = q.submit(kernel_func); } + input1_ptr.depends_on(event); + result1_ptr.depends_on(event); event_ref = reinterpret_cast(&event); return DPCTLEvent_Copy(event_ref); @@ -483,6 +485,8 @@ DPCTLSyclEventRef dpnp_ediff1d_c(DPCTLSyclQueueRef q_ref, }; event = q.submit(kernel_func); + input1_ptr.depends_on(event); + result_ptr.depends_on(event); event_ref = reinterpret_cast(&event); return DPCTLEvent_Copy(event_ref); @@ -676,6 +680,7 @@ void dpnp_floor_divide_c(void* result_out, where, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -770,6 +775,7 @@ void dpnp_modf_c(void* array1_in, void* result1_out, void* result2_out, size_t s size, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -911,6 +917,7 @@ void dpnp_remainder_c(void* result_out, where, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -1041,6 +1048,7 @@ void dpnp_trapz_c( array2_size, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template diff --git a/dpnp/backend/kernels/dpnp_krnl_random.cpp b/dpnp/backend/kernels/dpnp_krnl_random.cpp index afc5df8187d3..53207e67ff3e 100644 --- a/dpnp/backend/kernels/dpnp_krnl_random.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_random.cpp @@ -1674,23 +1674,15 @@ DPCTLSyclEventRef dpnp_rng_shuffle_c(DPCTLSyclQueueRef q_ref, // Fast, statically typed path: shuffle the underlying buffer. // Only for non-empty, 1d objects of class ndarray (subclasses such // as MaskedArrays may not support this approach). - char* buf = reinterpret_cast(sycl::malloc_shared(itemsize * sizeof(char), q)); + void* buf = sycl::malloc_device(itemsize, q); for (size_t i = uvec_size; i > 0; i--) { size_t j = (size_t)(floor((i + 1) * Uvec[i - 1])); if (i != j) { - auto memcpy1 = - q.submit([&](sycl::handler& h) { h.memcpy(buf, result1 + j * itemsize, itemsize); }); - auto memcpy2 = q.submit([&](sycl::handler& h) { - h.depends_on({memcpy1}); - h.memcpy(result1 + j * itemsize, result1 + i * itemsize, itemsize); - }); - auto memcpy3 = q.submit([&](sycl::handler& h) { - h.depends_on({memcpy2}); - h.memcpy(result1 + i * itemsize, buf, itemsize); - }); - memcpy3.wait(); + auto memcpy1 = q.memcpy(buf, result1 + j * itemsize, itemsize); + auto memcpy2 = q.memcpy(result1 + j * itemsize, result1 + i * itemsize, itemsize, memcpy1); + q.memcpy(result1 + i * itemsize, buf, itemsize, memcpy2).wait(); } } sycl::free(buf, q); @@ -1699,23 +1691,15 @@ DPCTLSyclEventRef dpnp_rng_shuffle_c(DPCTLSyclQueueRef q_ref, { // Multidimensional ndarrays require a bounce buffer. size_t step_size = (size / high_dim_size) * itemsize; // size in bytes for x[i] element - char* buf = reinterpret_cast(sycl::malloc_shared(step_size * sizeof(char), q)); + void* buf = sycl::malloc_device(step_size, q); for (size_t i = uvec_size; i > 0; i--) { size_t j = (size_t)(floor((i + 1) * Uvec[i - 1])); if (j < i) { - auto memcpy1 = - q.submit([&](sycl::handler& h) { h.memcpy(buf, result1 + j * step_size, step_size); }); - auto memcpy2 = q.submit([&](sycl::handler& h) { - h.depends_on({memcpy1}); - h.memcpy(result1 + j * step_size, result1 + i * step_size, step_size); - }); - auto memcpy3 = q.submit([&](sycl::handler& h) { - h.depends_on({memcpy2}); - h.memcpy(result1 + i * step_size, buf, step_size); - }); - memcpy3.wait(); + auto memcpy1 = q.memcpy(buf, result1 + j * step_size, step_size); + auto memcpy2 = q.memcpy(result1 + j * step_size, result1 + i * step_size, step_size, memcpy1); + q.memcpy(result1 + i * step_size, buf, step_size, memcpy2).wait(); } } sycl::free(buf, q); diff --git a/dpnp/backend/kernels/dpnp_krnl_reduction.cpp b/dpnp/backend/kernels/dpnp_krnl_reduction.cpp index acda35db3a34..e03aabee5aec 100644 --- a/dpnp/backend/kernels/dpnp_krnl_reduction.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_reduction.cpp @@ -162,6 +162,7 @@ void dpnp_sum_c(void* result_out, where, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -278,6 +279,7 @@ void dpnp_prod_c(void* result_out, where, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template diff --git a/dpnp/backend/kernels/dpnp_krnl_sorting.cpp b/dpnp/backend/kernels/dpnp_krnl_sorting.cpp index 84e919954d47..614bb94f0705 100644 --- a/dpnp/backend/kernels/dpnp_krnl_sorting.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_sorting.cpp @@ -91,6 +91,7 @@ void dpnp_argsort_c(void* array1_in, void* result1, size_t size) size, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -242,6 +243,7 @@ void dpnp_partition_c( ndim, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -394,6 +396,7 @@ void dpnp_searchsorted_c( v_size, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -459,6 +462,7 @@ void dpnp_sort_c(void* array1_in, void* result1, size_t size) size, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template diff --git a/dpnp/backend/kernels/dpnp_krnl_statistics.cpp b/dpnp/backend/kernels/dpnp_krnl_statistics.cpp index 0617529b81ba..abf77ff25eec 100644 --- a/dpnp/backend/kernels/dpnp_krnl_statistics.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_statistics.cpp @@ -101,6 +101,7 @@ void dpnp_correlate_c(void* result_out, where, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -298,6 +299,7 @@ void dpnp_count_nonzero_c(void* array1_in, void* result1_out, size_t size) size, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -539,6 +541,7 @@ void dpnp_max_c(void* array1_in, naxis, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -636,6 +639,7 @@ void dpnp_mean_c(void* array1_in, naxis, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -721,6 +725,7 @@ void dpnp_median_c(void* array1_in, naxis, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -963,6 +968,7 @@ void dpnp_min_c(void* array1_in, naxis, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -1044,6 +1050,7 @@ void dpnp_nanvar_c(void* array1_in, void* mask_arr1, void* result1, const size_t arr_size, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -1100,17 +1107,20 @@ DPCTLSyclEventRef dpnp_std_c(DPCTLSyclQueueRef q_ref, q)); *var_strides = 1; - dpnp_sqrt_c<_ResultType, _ResultType>(result1, - result1_size, - result1_ndim, - result1_shape, - result1_strides, - var, - var_size, - var_ndim, - var_shape, - var_strides, - NULL); + DPCTLSyclEventRef e_sqrt_ref = + dpnp_sqrt_c<_ResultType, _ResultType>(q_ref, result1, + result1_size, + result1_ndim, + result1_shape, + result1_strides, + var, + var_size, + var_ndim, + var_shape, + var_strides, + NULL, NULL); + DPCTLEvent_WaitAndThrow(e_sqrt_ref); + DPCTLEvent_Delete(e_sqrt_ref); sycl::free(var, q); sycl::free(result1_shape, q); @@ -1142,6 +1152,7 @@ void dpnp_std_c(void* array1_in, ddof, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -1253,6 +1264,7 @@ void dpnp_var_c(void* array1_in, ddof, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template diff --git a/dpnp/backend/src/dpnpc_memory_adapter.hpp b/dpnp/backend/src/dpnpc_memory_adapter.hpp index 19b8df3bea7d..3b07795ed5f6 100644 --- a/dpnp/backend/src/dpnpc_memory_adapter.hpp +++ b/dpnp/backend/src/dpnpc_memory_adapter.hpp @@ -52,6 +52,7 @@ class DPNPC_ptr_adapter final bool target_no_queue = false; /**< Indicates that original memory will be accessed from non SYCL environment */ bool copy_back = false; /**< If the memory is 'result' it needs to be copied back to original */ const bool verbose = false; + std::vector deps; public: DPNPC_ptr_adapter() = delete; @@ -68,6 +69,7 @@ class DPNPC_ptr_adapter final copy_back = copy_back_request; orig_ptr = const_cast(src_ptr); size_in_bytes = size * sizeof(_DataType); + deps = std::vector{}; // enum class alloc { host = 0, device = 1, shared = 2, unknown = 3 }; sycl::usm::alloc src_ptr_type = sycl::usm::alloc::unknown; @@ -117,6 +119,8 @@ class DPNPC_ptr_adapter final std::cerr << "DPNPC_ptr_converter::free_memory at=" << aux_ptr << std::endl; } + sycl::event::wait(deps); + if (copy_back) { copy_data_back(); @@ -158,6 +162,17 @@ class DPNPC_ptr_adapter final dpnp_memory_memcpy_c(queue_ref, orig_ptr, aux_ptr, size_in_bytes); } + + void depends_on(const std::vector &new_deps) { + assert(allocated); + deps.insert(std::end(deps), std::begin(new_deps), std::end(new_deps)); + } + + void depends_on(const sycl::event &new_dep) { + assert(allocated); + deps.push_back(new_dep); + } + }; #endif // DPNP_MEMORY_ADAPTER_H diff --git a/tests/skipped_tests.tbl b/tests/skipped_tests.tbl index f9863951f5b2..bbf3c1c3b535 100644 --- a/tests/skipped_tests.tbl +++ b/tests/skipped_tests.tbl @@ -1377,3 +1377,5 @@ tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_percentil tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_percentile_scalar_q tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_percentile_tuple_axis tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_percentile_uxpected_interpolation +tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_ptp_all_nan +tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_ptp_nan diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl index 5c823c299d54..6a4bd6e6bb87 100644 --- a/tests/skipped_tests_gpu.tbl +++ b/tests/skipped_tests_gpu.tbl @@ -1804,3 +1804,5 @@ tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_percentil tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_percentile_scalar_q tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_percentile_tuple_axis tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_percentile_uxpected_interpolation +tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_ptp_all_nan +tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_ptp_nan