From 885f91a9fd1cbd12f7123690f78bb00c02caf6fb Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Thu, 11 Aug 2022 10:27:02 -0500 Subject: [PATCH 01/10] dpnp_take_c uses SYCL kernel, no need to use no_sycl parameter in adapter The reason this crashed with CPU device and gave incorrect results on Windows was deeper. 1. Adapter call allocated USM-shared buffer and copies data into it 2. Kernel is submitted to work on USM-shared pointer 3. dpnp_take_c returns kernel submission even 4. Adapter class goes out of scope and frees USM allocation without making sure that the kernel that works on it has completed its execution 5. If kernel execution was in progress we got a crash on CPU, or incorrect result on GPU If kernel execution was complete it worked as expected. This change fixes the problem because it removes creation of unprotected USM-shared temporary. --- dpnp/backend/kernels/dpnp_krnl_indexing.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dpnp/backend/kernels/dpnp_krnl_indexing.cpp b/dpnp/backend/kernels/dpnp_krnl_indexing.cpp index ef6fffb8fb60..d37e319b7e3b 100644 --- a/dpnp/backend/kernels/dpnp_krnl_indexing.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_indexing.cpp @@ -901,7 +901,7 @@ DPCTLSyclEventRef dpnp_take_c(DPCTLSyclQueueRef q_ref, DPCTLSyclEventRef event_ref = nullptr; sycl::queue q = *(reinterpret_cast(q_ref)); - DPNPC_ptr_adapter<_DataType> input1_ptr(q_ref, array1_in, array1_size, true); + DPNPC_ptr_adapter<_DataType> input1_ptr(q_ref, array1_in, array1_size); DPNPC_ptr_adapter<_IndecesType> input2_ptr(q_ref, indices1, size); _DataType* array_1 = input1_ptr.get_ptr(); _IndecesType* indices = input2_ptr.get_ptr(); From 7220d872ee78929cad120f45da5f74e5d6feb4cd Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Thu, 11 Aug 2022 10:31:12 -0500 Subject: [PATCH 02/10] Change to DPNPC_adapter to set/use events upon which deallocation must depend The deallocation routine simply calls sycl::event::wait on the stored vector. --- dpnp/backend/src/dpnpc_memory_adapter.hpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/dpnp/backend/src/dpnpc_memory_adapter.hpp b/dpnp/backend/src/dpnpc_memory_adapter.hpp index 19b8df3bea7d..7b04b42a7566 100644 --- a/dpnp/backend/src/dpnpc_memory_adapter.hpp +++ b/dpnp/backend/src/dpnpc_memory_adapter.hpp @@ -52,6 +52,7 @@ class DPNPC_ptr_adapter final bool target_no_queue = false; /**< Indicates that original memory will be accessed from non SYCL environment */ bool copy_back = false; /**< If the memory is 'result' it needs to be copied back to original */ const bool verbose = false; + std::vector deps{}; public: DPNPC_ptr_adapter() = delete; @@ -68,6 +69,7 @@ class DPNPC_ptr_adapter final copy_back = copy_back_request; orig_ptr = const_cast(src_ptr); size_in_bytes = size * sizeof(_DataType); + deps = std::vector{}; // enum class alloc { host = 0, device = 1, shared = 2, unknown = 3 }; sycl::usm::alloc src_ptr_type = sycl::usm::alloc::unknown; @@ -117,6 +119,8 @@ class DPNPC_ptr_adapter final std::cerr << "DPNPC_ptr_converter::free_memory at=" << aux_ptr << std::endl; } + sycl::event::wait(deps); + if (copy_back) { copy_data_back(); @@ -158,6 +162,15 @@ class DPNPC_ptr_adapter final dpnp_memory_memcpy_c(queue_ref, orig_ptr, aux_ptr, size_in_bytes); } + + void depends_on(const std::vector &new_deps) { + deps.insert(std::end(deps), std::begin(new_deps), std::end(new_deps)); + } + + void depends_on(const sycl::event &new_dep) { + deps.push_back(new_dep); + } + }; #endif // DPNP_MEMORY_ADAPTER_H From 37386bb5802c5f3350cfca9059dd93d0ff231a26 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Sun, 14 Aug 2022 07:08:37 -0500 Subject: [PATCH 03/10] Used DPNPC_ptr_adapter::depends_on Also applied DPCTLEvent_Delete in legacy interfaces to avoid memory leak. --- .../kernels/dpnp_krnl_arraycreation.cpp | 47 ++++++++++++------- dpnp/backend/kernels/dpnp_krnl_bitwise.cpp | 24 ++++++---- dpnp/backend/kernels/dpnp_krnl_elemwise.cpp | 23 +++++++++ .../kernels/dpnp_krnl_mathematical.cpp | 8 ++++ dpnp/backend/kernels/dpnp_krnl_reduction.cpp | 2 + dpnp/backend/kernels/dpnp_krnl_sorting.cpp | 4 ++ dpnp/backend/kernels/dpnp_krnl_statistics.cpp | 34 +++++++++----- 7 files changed, 105 insertions(+), 37 deletions(-) diff --git a/dpnp/backend/kernels/dpnp_krnl_arraycreation.cpp b/dpnp/backend/kernels/dpnp_krnl_arraycreation.cpp index 302289220f81..3b7f362d51a5 100644 --- a/dpnp/backend/kernels/dpnp_krnl_arraycreation.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_arraycreation.cpp @@ -493,6 +493,9 @@ DPCTLSyclEventRef dpnp_ptp_c(DPCTLSyclQueueRef q_ref, (void)dep_event_vec_ref; DPCTLSyclEventRef event_ref = nullptr; + DPCTLSyclEventRef e1_ref = nullptr; + DPCTLSyclEventRef e2_ref = nullptr; + DPCTLSyclEventRef e3_ref = nullptr; if ((input1_in == nullptr) || (result1_out == nullptr)) { @@ -514,29 +517,36 @@ DPCTLSyclEventRef dpnp_ptp_c(DPCTLSyclQueueRef q_ref, _DataType* min_arr = reinterpret_cast<_DataType*>(sycl::malloc_shared(result_size * sizeof(_DataType), q)); _DataType* max_arr = reinterpret_cast<_DataType*>(sycl::malloc_shared(result_size * sizeof(_DataType), q)); - dpnp_min_c<_DataType>(arr, min_arr, result_size, input_shape, input_ndim, axis, naxis); - dpnp_max_c<_DataType>(arr, max_arr, result_size, input_shape, input_ndim, axis, naxis); + e1_ref = dpnp_min_c<_DataType>(q_ref, arr, min_arr, result_size, input_shape, input_ndim, axis, naxis, NULL); + e2_ref = dpnp_max_c<_DataType>(q_ref, arr, max_arr, result_size, input_shape, input_ndim, axis, naxis, NULL); shape_elem_type* _strides = reinterpret_cast(sycl::malloc_shared(result_ndim * sizeof(shape_elem_type), q)); get_shape_offsets_inkernel(result_shape, result_ndim, _strides); - dpnp_subtract_c<_DataType, _DataType, _DataType>(result, - result_size, - result_ndim, - result_shape, - result_strides, - max_arr, - result_size, - result_ndim, - result_shape, - _strides, - min_arr, - result_size, - result_ndim, - result_shape, - _strides, - NULL); + e3_ref = dpnp_subtract_c<_DataType, _DataType, _DataType>(q_ref, result, + result_size, + result_ndim, + result_shape, + result_strides, + max_arr, + result_size, + result_ndim, + result_shape, + _strides, + min_arr, + result_size, + result_ndim, + result_shape, + _strides, + NULL, NULL); + + DPCTLEvent_Wait(e1_ref); + DPCTLEvent_Wait(e2_ref); + DPCTLEvent_Wait(e3_ref); + DPCTLEvent_Delete(e1_ref); + DPCTLEvent_Delete(e2_ref); + DPCTLEvent_Delete(e3_ref); sycl::free(min_arr, q); sycl::free(max_arr, q); @@ -576,6 +586,7 @@ void dpnp_ptp_c(void* result1_out, naxis, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template diff --git a/dpnp/backend/kernels/dpnp_krnl_bitwise.cpp b/dpnp/backend/kernels/dpnp_krnl_bitwise.cpp index 4d0f6498ed0f..b64670be4e09 100644 --- a/dpnp/backend/kernels/dpnp_krnl_bitwise.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_bitwise.cpp @@ -148,16 +148,16 @@ static void func_map_init_bitwise_1arg_1type(func_map_t& fmap) \ sycl::queue q = *(reinterpret_cast(q_ref)); \ \ - DPNPC_ptr_adapter<_DataType> input1_ptr(q_ref, input1_in, input1_size); \ - DPNPC_ptr_adapter input1_shape_ptr(q_ref, input1_shape, input1_ndim, true); \ - DPNPC_ptr_adapter input1_strides_ptr(q_ref, input1_strides, input1_ndim, true); \ + DPNPC_ptr_adapter<_DataType> input1_ptr(q_ref, input1_in, input1_size); \ + DPNPC_ptr_adapter input1_shape_ptr(q_ref, input1_shape, input1_ndim, true); \ + DPNPC_ptr_adapter input1_strides_ptr(q_ref, input1_strides, input1_ndim, true); \ \ - DPNPC_ptr_adapter<_DataType> input2_ptr(q_ref, input2_in, input2_size); \ - DPNPC_ptr_adapter input2_shape_ptr(q_ref, input2_shape, input2_ndim, true); \ - DPNPC_ptr_adapter input2_strides_ptr(q_ref, input2_strides, input2_ndim, true); \ + DPNPC_ptr_adapter<_DataType> input2_ptr(q_ref, input2_in, input2_size); \ + DPNPC_ptr_adapter input2_shape_ptr(q_ref, input2_shape, input2_ndim, true); \ + DPNPC_ptr_adapter input2_strides_ptr(q_ref, input2_strides, input2_ndim, true); \ \ - DPNPC_ptr_adapter<_DataType> result_ptr(q_ref, result_out, result_size, false, true); \ - DPNPC_ptr_adapter result_strides_ptr(q_ref, result_strides, result_ndim); \ + DPNPC_ptr_adapter<_DataType> result_ptr(q_ref, result_out, result_size, false, true); \ + DPNPC_ptr_adapter result_strides_ptr(q_ref, result_strides, result_ndim); \ \ _DataType* input1_data = input1_ptr.get_ptr(); \ shape_elem_type* input1_shape_data = input1_shape_ptr.get_ptr(); \ @@ -226,6 +226,14 @@ static void func_map_init_bitwise_1arg_1type(func_map_t& fmap) }; \ event = q.submit(kernel_func); \ } \ + input1_ptr.depends_on(event); \ + input1_shape_ptr.depends_on(event); \ + input1_strides_ptr.depends_on(event); \ + input2_ptr.depends_on(event); \ + input2_shape_ptr.depends_on(event); \ + input2_strides_ptr.depends_on(event); \ + result_ptr.depends_on(event); \ + result_strides_ptr.depends_on(event); \ event_ref = reinterpret_cast(&event); \ \ return DPCTLEvent_Copy(event_ref); \ diff --git a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp index 98aff7eac758..63b6195e7889 100644 --- a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp @@ -143,6 +143,12 @@ } \ } \ \ + input1_ptr.depends_on(event); \ + input1_shape_ptr.depends_on(event); \ + input1_strides_ptr.depends_on(event); \ + result_ptr.depends_on(event); \ + result_strides_ptr.depends_on(event); \ + \ event_ref = reinterpret_cast(&event); \ \ return DPCTLEvent_Copy(event_ref); \ @@ -644,6 +650,12 @@ static void func_map_init_elemwise_1arg_2type(func_map_t& fmap) } \ } \ \ + input1_ptr.depends_on(event); \ + input1_shape_ptr.depends_on(event); \ + input1_strides_ptr.depends_on(event); \ + result_ptr.depends_on(event); \ + result_strides_ptr.depends_on(event); \ + \ event_ref = reinterpret_cast(&event); \ \ return DPCTLEvent_Copy(event_ref); \ @@ -998,6 +1010,17 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap) event = q.submit(kernel_func); \ } \ } \ + \ + input1_ptr.depends_on(event); \ + input1_shape_ptr.depends_on(event); \ + input1_strides_ptr.depends_on(event); \ + input2_ptr.depends_on(event); \ + input2_shape_ptr.depends_on(event); \ + input2_strides_ptr.depends_on(event); \ + result_ptr.depends_on(event); \ + result_shape_ptr.depends_on(event); \ + result_strides_ptr.depends_on(event); \ + \ event_ref = reinterpret_cast(&event); \ \ return DPCTLEvent_Copy(event_ref); \ diff --git a/dpnp/backend/kernels/dpnp_krnl_mathematical.cpp b/dpnp/backend/kernels/dpnp_krnl_mathematical.cpp index dc2493094a26..32f8ffe465d2 100644 --- a/dpnp/backend/kernels/dpnp_krnl_mathematical.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_mathematical.cpp @@ -170,6 +170,8 @@ DPCTLSyclEventRef dpnp_elemwise_absolute_c(DPCTLSyclQueueRef q_ref, event = q.submit(kernel_func); } + input1_ptr.depends_on(event); + result1_ptr.depends_on(event); event_ref = reinterpret_cast(&event); return DPCTLEvent_Copy(event_ref); @@ -483,6 +485,8 @@ DPCTLSyclEventRef dpnp_ediff1d_c(DPCTLSyclQueueRef q_ref, }; event = q.submit(kernel_func); + input1_ptr.depends_on(event); + result_ptr.depends_on(event); event_ref = reinterpret_cast(&event); return DPCTLEvent_Copy(event_ref); @@ -676,6 +680,7 @@ void dpnp_floor_divide_c(void* result_out, where, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -770,6 +775,7 @@ void dpnp_modf_c(void* array1_in, void* result1_out, void* result2_out, size_t s size, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -911,6 +917,7 @@ void dpnp_remainder_c(void* result_out, where, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -1041,6 +1048,7 @@ void dpnp_trapz_c( array2_size, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template diff --git a/dpnp/backend/kernels/dpnp_krnl_reduction.cpp b/dpnp/backend/kernels/dpnp_krnl_reduction.cpp index acda35db3a34..e03aabee5aec 100644 --- a/dpnp/backend/kernels/dpnp_krnl_reduction.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_reduction.cpp @@ -162,6 +162,7 @@ void dpnp_sum_c(void* result_out, where, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -278,6 +279,7 @@ void dpnp_prod_c(void* result_out, where, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template diff --git a/dpnp/backend/kernels/dpnp_krnl_sorting.cpp b/dpnp/backend/kernels/dpnp_krnl_sorting.cpp index 84e919954d47..614bb94f0705 100644 --- a/dpnp/backend/kernels/dpnp_krnl_sorting.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_sorting.cpp @@ -91,6 +91,7 @@ void dpnp_argsort_c(void* array1_in, void* result1, size_t size) size, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -242,6 +243,7 @@ void dpnp_partition_c( ndim, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -394,6 +396,7 @@ void dpnp_searchsorted_c( v_size, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -459,6 +462,7 @@ void dpnp_sort_c(void* array1_in, void* result1, size_t size) size, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template diff --git a/dpnp/backend/kernels/dpnp_krnl_statistics.cpp b/dpnp/backend/kernels/dpnp_krnl_statistics.cpp index 0617529b81ba..abf77ff25eec 100644 --- a/dpnp/backend/kernels/dpnp_krnl_statistics.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_statistics.cpp @@ -101,6 +101,7 @@ void dpnp_correlate_c(void* result_out, where, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -298,6 +299,7 @@ void dpnp_count_nonzero_c(void* array1_in, void* result1_out, size_t size) size, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -539,6 +541,7 @@ void dpnp_max_c(void* array1_in, naxis, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -636,6 +639,7 @@ void dpnp_mean_c(void* array1_in, naxis, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -721,6 +725,7 @@ void dpnp_median_c(void* array1_in, naxis, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -963,6 +968,7 @@ void dpnp_min_c(void* array1_in, naxis, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -1044,6 +1050,7 @@ void dpnp_nanvar_c(void* array1_in, void* mask_arr1, void* result1, const size_t arr_size, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -1100,17 +1107,20 @@ DPCTLSyclEventRef dpnp_std_c(DPCTLSyclQueueRef q_ref, q)); *var_strides = 1; - dpnp_sqrt_c<_ResultType, _ResultType>(result1, - result1_size, - result1_ndim, - result1_shape, - result1_strides, - var, - var_size, - var_ndim, - var_shape, - var_strides, - NULL); + DPCTLSyclEventRef e_sqrt_ref = + dpnp_sqrt_c<_ResultType, _ResultType>(q_ref, result1, + result1_size, + result1_ndim, + result1_shape, + result1_strides, + var, + var_size, + var_ndim, + var_shape, + var_strides, + NULL, NULL); + DPCTLEvent_WaitAndThrow(e_sqrt_ref); + DPCTLEvent_Delete(e_sqrt_ref); sycl::free(var, q); sycl::free(result1_shape, q); @@ -1142,6 +1152,7 @@ void dpnp_std_c(void* array1_in, ddof, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -1253,6 +1264,7 @@ void dpnp_var_c(void* array1_in, ddof, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template From 16a7632b87faba7257d6cb1b2e7bca2df6c4196b Mon Sep 17 00:00:00 2001 From: Anton <100830759+antonwolfy@users.noreply.github.com> Date: Wed, 17 Aug 2022 13:04:52 +0200 Subject: [PATCH 04/10] Get rid of "Improper Null Termination" issue Add a null-terminated symbol at the end of char array to avoid "Improper Null Termination" issue reported by Checkmarx scan. --- dpnp/backend/kernels/dpnp_krnl_random.cpp | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/dpnp/backend/kernels/dpnp_krnl_random.cpp b/dpnp/backend/kernels/dpnp_krnl_random.cpp index afc5df8187d3..47d104677471 100644 --- a/dpnp/backend/kernels/dpnp_krnl_random.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_random.cpp @@ -1674,14 +1674,17 @@ DPCTLSyclEventRef dpnp_rng_shuffle_c(DPCTLSyclQueueRef q_ref, // Fast, statically typed path: shuffle the underlying buffer. // Only for non-empty, 1d objects of class ndarray (subclasses such // as MaskedArrays may not support this approach). - char* buf = reinterpret_cast(sycl::malloc_shared(itemsize * sizeof(char), q)); + char* buf = reinterpret_cast(sycl::malloc_shared((itemsize + 1) * sizeof(char), q)); for (size_t i = uvec_size; i > 0; i--) { size_t j = (size_t)(floor((i + 1) * Uvec[i - 1])); if (i != j) { - auto memcpy1 = - q.submit([&](sycl::handler& h) { h.memcpy(buf, result1 + j * itemsize, itemsize); }); + auto memcpy1 = q.submit([&](sycl::handler& h) { + h.memcpy(buf, result1 + j * itemsize, itemsize); + // Make as null-terminated buffer to resolve CheckMarx's false positive issue + buf[itemsize] = '\0'; + }); auto memcpy2 = q.submit([&](sycl::handler& h) { h.depends_on({memcpy1}); h.memcpy(result1 + j * itemsize, result1 + i * itemsize, itemsize); @@ -1699,14 +1702,17 @@ DPCTLSyclEventRef dpnp_rng_shuffle_c(DPCTLSyclQueueRef q_ref, { // Multidimensional ndarrays require a bounce buffer. size_t step_size = (size / high_dim_size) * itemsize; // size in bytes for x[i] element - char* buf = reinterpret_cast(sycl::malloc_shared(step_size * sizeof(char), q)); + char* buf = reinterpret_cast(sycl::malloc_shared((step_size + 1) * sizeof(char), q)); for (size_t i = uvec_size; i > 0; i--) { size_t j = (size_t)(floor((i + 1) * Uvec[i - 1])); if (j < i) { - auto memcpy1 = - q.submit([&](sycl::handler& h) { h.memcpy(buf, result1 + j * step_size, step_size); }); + auto memcpy1 = q.submit([&](sycl::handler& h) { + h.memcpy(buf, result1 + j * step_size, step_size); + // Make as null-terminated buffer to resolve CheckMarx's false positive issue + buf[step_size] = '\0'; + }); auto memcpy2 = q.submit([&](sycl::handler& h) { h.depends_on({memcpy1}); h.memcpy(result1 + j * step_size, result1 + i * step_size, step_size); From d839ea1f9378229e6564a9ff6cc19017221be725 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Wed, 17 Aug 2022 13:32:08 -0500 Subject: [PATCH 05/10] implemented PR feedback --- dpnp/backend/src/dpnpc_memory_adapter.hpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dpnp/backend/src/dpnpc_memory_adapter.hpp b/dpnp/backend/src/dpnpc_memory_adapter.hpp index 7b04b42a7566..3b07795ed5f6 100644 --- a/dpnp/backend/src/dpnpc_memory_adapter.hpp +++ b/dpnp/backend/src/dpnpc_memory_adapter.hpp @@ -52,7 +52,7 @@ class DPNPC_ptr_adapter final bool target_no_queue = false; /**< Indicates that original memory will be accessed from non SYCL environment */ bool copy_back = false; /**< If the memory is 'result' it needs to be copied back to original */ const bool verbose = false; - std::vector deps{}; + std::vector deps; public: DPNPC_ptr_adapter() = delete; @@ -164,10 +164,12 @@ class DPNPC_ptr_adapter final } void depends_on(const std::vector &new_deps) { + assert(allocated); deps.insert(std::end(deps), std::begin(new_deps), std::end(new_deps)); } void depends_on(const sycl::event &new_dep) { + assert(allocated); deps.push_back(new_dep); } From 818dc82991642a621d10213ee573dde8a35310d4 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Thu, 18 Aug 2022 14:48:14 -0500 Subject: [PATCH 06/10] Reworked solution with a pointer on void --- dpnp/backend/kernels/dpnp_krnl_random.cpp | 38 +++++------------------ 1 file changed, 8 insertions(+), 30 deletions(-) diff --git a/dpnp/backend/kernels/dpnp_krnl_random.cpp b/dpnp/backend/kernels/dpnp_krnl_random.cpp index 47d104677471..5e3f9531b6c9 100644 --- a/dpnp/backend/kernels/dpnp_krnl_random.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_random.cpp @@ -1674,26 +1674,15 @@ DPCTLSyclEventRef dpnp_rng_shuffle_c(DPCTLSyclQueueRef q_ref, // Fast, statically typed path: shuffle the underlying buffer. // Only for non-empty, 1d objects of class ndarray (subclasses such // as MaskedArrays may not support this approach). - char* buf = reinterpret_cast(sycl::malloc_shared((itemsize + 1) * sizeof(char), q)); + void* buf = sycl::malloc_device(itemsize, q); for (size_t i = uvec_size; i > 0; i--) { size_t j = (size_t)(floor((i + 1) * Uvec[i - 1])); if (i != j) { - auto memcpy1 = q.submit([&](sycl::handler& h) { - h.memcpy(buf, result1 + j * itemsize, itemsize); - // Make as null-terminated buffer to resolve CheckMarx's false positive issue - buf[itemsize] = '\0'; - }); - auto memcpy2 = q.submit([&](sycl::handler& h) { - h.depends_on({memcpy1}); - h.memcpy(result1 + j * itemsize, result1 + i * itemsize, itemsize); - }); - auto memcpy3 = q.submit([&](sycl::handler& h) { - h.depends_on({memcpy2}); - h.memcpy(result1 + i * itemsize, buf, itemsize); - }); - memcpy3.wait(); + auto memcpy1 = q.memcpy(buf, result1 + j * itemsize, itemsize); + auto memcpy2 = q.memcpy(result1 + j * itemsize, result1 + i * itemsize, itemsize, memcpy1); + q.memcpy(result1 + i * itemsize, buf, itemsize, memcpy2).wait(); } } sycl::free(buf, q); @@ -1702,26 +1691,15 @@ DPCTLSyclEventRef dpnp_rng_shuffle_c(DPCTLSyclQueueRef q_ref, { // Multidimensional ndarrays require a bounce buffer. size_t step_size = (size / high_dim_size) * itemsize; // size in bytes for x[i] element - char* buf = reinterpret_cast(sycl::malloc_shared((step_size + 1) * sizeof(char), q)); + void* buf = sycl::malloc_device(step_size, q); for (size_t i = uvec_size; i > 0; i--) { size_t j = (size_t)(floor((i + 1) * Uvec[i - 1])); if (j < i) { - auto memcpy1 = q.submit([&](sycl::handler& h) { - h.memcpy(buf, result1 + j * step_size, step_size); - // Make as null-terminated buffer to resolve CheckMarx's false positive issue - buf[step_size] = '\0'; - }); - auto memcpy2 = q.submit([&](sycl::handler& h) { - h.depends_on({memcpy1}); - h.memcpy(result1 + j * step_size, result1 + i * step_size, step_size); - }); - auto memcpy3 = q.submit([&](sycl::handler& h) { - h.depends_on({memcpy2}); - h.memcpy(result1 + i * step_size, buf, step_size); - }); - memcpy3.wait(); + auto memcpy1 = q.memcpy(buf, result1 + j * step_size, step_size); + auto memcpy2 = q.memcpy(result1 + j * step_size, result1 + i * step_size, step_size, memcpy1); + q.memcpy(result1 + i * step_size, buf, step_size, memcpy2).wait(); } } sycl::free(buf, q); From b29d95782706d9056b2962de9e104046dc19721a Mon Sep 17 00:00:00 2001 From: Anton <100830759+antonwolfy@users.noreply.github.com> Date: Thu, 18 Aug 2022 22:06:54 +0200 Subject: [PATCH 07/10] Update dpnp/backend/kernels/dpnp_krnl_random.cpp Co-authored-by: Oleksandr Pavlyk --- dpnp/backend/kernels/dpnp_krnl_random.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dpnp/backend/kernels/dpnp_krnl_random.cpp b/dpnp/backend/kernels/dpnp_krnl_random.cpp index 5e3f9531b6c9..7b115351c14d 100644 --- a/dpnp/backend/kernels/dpnp_krnl_random.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_random.cpp @@ -1674,7 +1674,7 @@ DPCTLSyclEventRef dpnp_rng_shuffle_c(DPCTLSyclQueueRef q_ref, // Fast, statically typed path: shuffle the underlying buffer. // Only for non-empty, 1d objects of class ndarray (subclasses such // as MaskedArrays may not support this approach). - void* buf = sycl::malloc_device(itemsize, q); + void* buf = sycl::malloc_device(itemsize, q); for (size_t i = uvec_size; i > 0; i--) { size_t j = (size_t)(floor((i + 1) * Uvec[i - 1])); From 629d0e0367bc4851a292021eae7a115c46223b06 Mon Sep 17 00:00:00 2001 From: Anton <100830759+antonwolfy@users.noreply.github.com> Date: Thu, 18 Aug 2022 22:07:07 +0200 Subject: [PATCH 08/10] Update dpnp/backend/kernels/dpnp_krnl_random.cpp Co-authored-by: Oleksandr Pavlyk --- dpnp/backend/kernels/dpnp_krnl_random.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dpnp/backend/kernels/dpnp_krnl_random.cpp b/dpnp/backend/kernels/dpnp_krnl_random.cpp index 7b115351c14d..53207e67ff3e 100644 --- a/dpnp/backend/kernels/dpnp_krnl_random.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_random.cpp @@ -1691,7 +1691,7 @@ DPCTLSyclEventRef dpnp_rng_shuffle_c(DPCTLSyclQueueRef q_ref, { // Multidimensional ndarrays require a bounce buffer. size_t step_size = (size / high_dim_size) * itemsize; // size in bytes for x[i] element - void* buf = sycl::malloc_device(step_size, q); + void* buf = sycl::malloc_device(step_size, q); for (size_t i = uvec_size; i > 0; i--) { size_t j = (size_t)(floor((i + 1) * Uvec[i - 1])); From 2fed06a9210c9251fe3e04a424d3f217cf9e0d86 Mon Sep 17 00:00:00 2001 From: Alexander Rybkin Date: Wed, 24 Aug 2022 15:14:31 +0200 Subject: [PATCH 09/10] Skip for two more tests till waiting fix (#1171) * Skip for two more tests till waiting fix tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_ptp_all_nan tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_ptp_nan Need to skip them because CI does not work due to this. * The same tests skip for gpu --- tests/skipped_tests.tbl | 2 ++ tests/skipped_tests_gpu.tbl | 2 ++ 2 files changed, 4 insertions(+) diff --git a/tests/skipped_tests.tbl b/tests/skipped_tests.tbl index f9863951f5b2..bbf3c1c3b535 100644 --- a/tests/skipped_tests.tbl +++ b/tests/skipped_tests.tbl @@ -1377,3 +1377,5 @@ tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_percentil tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_percentile_scalar_q tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_percentile_tuple_axis tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_percentile_uxpected_interpolation +tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_ptp_all_nan +tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_ptp_nan diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl index 5c823c299d54..6a4bd6e6bb87 100644 --- a/tests/skipped_tests_gpu.tbl +++ b/tests/skipped_tests_gpu.tbl @@ -1804,3 +1804,5 @@ tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_percentil tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_percentile_scalar_q tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_percentile_tuple_axis tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_percentile_uxpected_interpolation +tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_ptp_all_nan +tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_ptp_nan From 9b14f0ca76a9e0c309bb97b4d5caa0870eecd6bb Mon Sep 17 00:00:00 2001 From: Anton <100830759+antonwolfy@users.noreply.github.com> Date: Tue, 30 Aug 2022 14:35:28 +0200 Subject: [PATCH 10/10] dpnp_take failed on Windows due to memory corruption (#1172) * dpnp_take failed on Windows due to memory corruption * Add more tests * Integer indexes types with different types of input data * Add trailing empty line to .gitignore --- .gitignore | 10 ++++- dpnp/backend/kernels/dpnp_krnl_indexing.cpp | 49 +++++++++++++-------- tests/skipped_tests_gpu.tbl | 13 ------ tests/test_indexing.py | 12 +++-- 4 files changed, 49 insertions(+), 35 deletions(-) diff --git a/.gitignore b/.gitignore index 2ac17b1752b8..ea56758f290b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,15 @@ +# CMake build and local install directory build build_cython + +# Byte-compiled / optimized / DLL files __pycache__/ + +# Code project files +.vscode + *dpnp_backend* dpnp/**/*.cpython*.so dpnp/**/*.pyd -*~ \ No newline at end of file +*~ +core diff --git a/dpnp/backend/kernels/dpnp_krnl_indexing.cpp b/dpnp/backend/kernels/dpnp_krnl_indexing.cpp index d37e319b7e3b..5cde013b69f8 100644 --- a/dpnp/backend/kernels/dpnp_krnl_indexing.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_indexing.cpp @@ -901,10 +901,8 @@ DPCTLSyclEventRef dpnp_take_c(DPCTLSyclQueueRef q_ref, DPCTLSyclEventRef event_ref = nullptr; sycl::queue q = *(reinterpret_cast(q_ref)); - DPNPC_ptr_adapter<_DataType> input1_ptr(q_ref, array1_in, array1_size); - DPNPC_ptr_adapter<_IndecesType> input2_ptr(q_ref, indices1, size); - _DataType* array_1 = input1_ptr.get_ptr(); - _IndecesType* indices = input2_ptr.get_ptr(); + _DataType* array_1 = reinterpret_cast<_DataType*>(array1_in); + _IndecesType* indices = reinterpret_cast<_IndecesType*>(indices1); _DataType* result = reinterpret_cast<_DataType*>(result1); sycl::range<1> gws(size); @@ -920,7 +918,6 @@ DPCTLSyclEventRef dpnp_take_c(DPCTLSyclQueueRef q_ref, sycl::event event = q.submit(kernel_func); event_ref = reinterpret_cast(&event); - return DPCTLEvent_Copy(event_ref); } @@ -937,6 +934,7 @@ void dpnp_take_c(void* array1_in, const size_t array1_size, void* indices1, void size, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -1073,21 +1071,36 @@ void func_map_init_indexing_func(func_map_t& fmap) fmap[DPNPFuncName::DPNP_FN_PUT_ALONG_AXIS_EXT][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_put_along_axis_ext_c}; - fmap[DPNPFuncName::DPNP_FN_TAKE][eft_BLN][eft_BLN] = {eft_BLN, (void*)dpnp_take_default_c}; - fmap[DPNPFuncName::DPNP_FN_TAKE][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_take_default_c}; + fmap[DPNPFuncName::DPNP_FN_TAKE][eft_BLN][eft_INT] = {eft_BLN, (void*)dpnp_take_default_c}; + fmap[DPNPFuncName::DPNP_FN_TAKE][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_take_default_c}; + fmap[DPNPFuncName::DPNP_FN_TAKE][eft_LNG][eft_INT] = {eft_LNG, (void*)dpnp_take_default_c}; + fmap[DPNPFuncName::DPNP_FN_TAKE][eft_FLT][eft_INT] = {eft_FLT, (void*)dpnp_take_default_c}; + fmap[DPNPFuncName::DPNP_FN_TAKE][eft_DBL][eft_INT] = {eft_DBL, (void*)dpnp_take_default_c}; + fmap[DPNPFuncName::DPNP_FN_TAKE][eft_C128][eft_INT] = {eft_C128, + (void*)dpnp_take_default_c, int32_t>}; + fmap[DPNPFuncName::DPNP_FN_TAKE][eft_BLN][eft_LNG] = {eft_BLN, (void*)dpnp_take_default_c}; + fmap[DPNPFuncName::DPNP_FN_TAKE][eft_INT][eft_LNG] = {eft_INT, (void*)dpnp_take_default_c}; fmap[DPNPFuncName::DPNP_FN_TAKE][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_take_default_c}; - fmap[DPNPFuncName::DPNP_FN_TAKE][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_take_default_c}; - fmap[DPNPFuncName::DPNP_FN_TAKE][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_take_default_c}; - fmap[DPNPFuncName::DPNP_FN_TAKE][eft_C128][eft_C128] = {eft_C128, - (void*)dpnp_take_default_c, int64_t>}; - - fmap[DPNPFuncName::DPNP_FN_TAKE_EXT][eft_BLN][eft_BLN] = {eft_BLN, (void*)dpnp_take_ext_c}; - fmap[DPNPFuncName::DPNP_FN_TAKE_EXT][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_take_ext_c}; + fmap[DPNPFuncName::DPNP_FN_TAKE][eft_FLT][eft_LNG] = {eft_FLT, (void*)dpnp_take_default_c}; + fmap[DPNPFuncName::DPNP_FN_TAKE][eft_DBL][eft_LNG] = {eft_DBL, (void*)dpnp_take_default_c}; + fmap[DPNPFuncName::DPNP_FN_TAKE][eft_C128][eft_LNG] = {eft_C128, + (void*)dpnp_take_default_c, int64_t>}; + + // TODO: add a handling of other indexes types once DPCtl implementation of data copy is ready + fmap[DPNPFuncName::DPNP_FN_TAKE_EXT][eft_BLN][eft_INT] = {eft_BLN, (void*)dpnp_take_ext_c}; + fmap[DPNPFuncName::DPNP_FN_TAKE_EXT][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_take_ext_c}; + fmap[DPNPFuncName::DPNP_FN_TAKE_EXT][eft_LNG][eft_INT] = {eft_LNG, (void*)dpnp_take_ext_c}; + fmap[DPNPFuncName::DPNP_FN_TAKE_EXT][eft_FLT][eft_INT] = {eft_FLT, (void*)dpnp_take_ext_c}; + fmap[DPNPFuncName::DPNP_FN_TAKE_EXT][eft_DBL][eft_INT] = {eft_DBL, (void*)dpnp_take_ext_c}; + fmap[DPNPFuncName::DPNP_FN_TAKE_EXT][eft_C128][eft_INT] = {eft_C128, + (void*)dpnp_take_ext_c, int32_t>}; + fmap[DPNPFuncName::DPNP_FN_TAKE_EXT][eft_BLN][eft_LNG] = {eft_BLN, (void*)dpnp_take_ext_c}; + fmap[DPNPFuncName::DPNP_FN_TAKE_EXT][eft_INT][eft_LNG] = {eft_INT, (void*)dpnp_take_ext_c}; fmap[DPNPFuncName::DPNP_FN_TAKE_EXT][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_take_ext_c}; - fmap[DPNPFuncName::DPNP_FN_TAKE_EXT][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_take_ext_c}; - fmap[DPNPFuncName::DPNP_FN_TAKE_EXT][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_take_ext_c}; - fmap[DPNPFuncName::DPNP_FN_TAKE_EXT][eft_C128][eft_C128] = {eft_C128, - (void*)dpnp_take_ext_c, int64_t>}; + fmap[DPNPFuncName::DPNP_FN_TAKE_EXT][eft_FLT][eft_LNG] = {eft_FLT, (void*)dpnp_take_ext_c}; + fmap[DPNPFuncName::DPNP_FN_TAKE_EXT][eft_DBL][eft_LNG] = {eft_DBL, (void*)dpnp_take_ext_c}; + fmap[DPNPFuncName::DPNP_FN_TAKE_EXT][eft_C128][eft_LNG] = {eft_C128, + (void*)dpnp_take_ext_c, int64_t>}; return; } diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl index 6a4bd6e6bb87..d41fe24c3c70 100644 --- a/tests/skipped_tests_gpu.tbl +++ b/tests/skipped_tests_gpu.tbl @@ -138,19 +138,6 @@ tests/test_indexing.py::test_nonzero[[[0, 1, 2], [3, 0, 5], [6, 7, 0]]] tests/test_indexing.py::test_nonzero[[[0, 1, 0, 3, 0], [5, 0, 7, 0, 9]]] tests/test_indexing.py::test_nonzero[[[[1, 2], [0, 4]], [[0, 2], [0, 1]], [[0, 0], [3, 1]]]] tests/test_indexing.py::test_nonzero[[[[[1, 2, 3], [3, 4, 5]], [[1, 2, 3], [2, 1, 0]]], [[[1, 3, 5], [3, 1, 0]], [[0, 1, 2], [1, 3, 4]]]]] -tests/test_indexing.py::test_take[[[0, 1, 2], [3, 4, 5], [6, 7, 8]]-[[0, 0], [0, 0]]] -tests/test_indexing.py::test_take[[[0, 1, 2], [3, 4, 5], [6, 7, 8]]-[[1, 2], [1, 2]]] -tests/test_indexing.py::test_take[[[0, 1, 2], [3, 4, 5], [6, 7, 8]]-[[1, 2], [3, 4]]] -tests/test_indexing.py::test_take[[[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]]-[[1, 2], [1, 2]]] -tests/test_indexing.py::test_take[[[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]]-[[1, 2], [3, 4]]] -tests/test_indexing.py::test_take[[[[1, 2], [3, 4]], [[1, 2], [2, 1]], [[1, 3], [3, 1]]]-[[1, 2], [1, 2]]] -tests/test_indexing.py::test_take[[[[1, 2], [3, 4]], [[1, 2], [2, 1]], [[1, 3], [3, 1]]]-[[1, 2], [3, 4]]] -tests/test_indexing.py::test_take[[[[[1, 2], [3, 4]], [[1, 2], [2, 1]]], [[[1, 3], [3, 1]], [[0, 1], [1, 3]]]]-[[1, 2], [1, 2]]] -tests/test_indexing.py::test_take[[[[[1, 2], [3, 4]], [[1, 2], [2, 1]]], [[[1, 3], [3, 1]], [[0, 1], [1, 3]]]]-[[1, 2], [3, 4]]] -tests/test_indexing.py::test_take[[[[[1, 2, 3], [3, 4, 5]], [[1, 2, 3], [2, 1, 0]]], [[[1, 3, 5], [3, 1, 0]], [[0, 1, 2], [1, 3, 4]]]]-[[1, 2], [1, 2]]] -tests/test_indexing.py::test_take[[[[[1, 2, 3], [3, 4, 5]], [[1, 2, 3], [2, 1, 0]]], [[[1, 3, 5], [3, 1, 0]], [[0, 1, 2], [1, 3, 4]]]]-[[1, 2], [3, 4]]] -tests/test_indexing.py::test_take[[[[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]], [[[13, 14, 15], [16, 17, 18]], [[19, 20, 21], [22, 23, 24]]]]-[[1, 2], [1, 2]]] -tests/test_indexing.py::test_take[[[[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]], [[[13, 14, 15], [16, 17, 18]], [[19, 20, 21], [22, 23, 24]]]]-[[1, 2], [3, 4]]] tests/third_party/cupy/creation_tests/test_ranges.py::TestRanges::test_arange_no_dtype_int tests/third_party/cupy/indexing_tests/test_indexing.py::TestIndexing::test_take_no_axis tests/third_party/cupy/indexing_tests/test_insert.py::TestPlace_param_3_{n_vals=1, shape=(7,)}::test_place diff --git a/tests/test_indexing.py b/tests/test_indexing.py index c07beee0262e..6519576171d0 100644 --- a/tests/test_indexing.py +++ b/tests/test_indexing.py @@ -374,6 +374,12 @@ def test_select(): numpy.testing.assert_array_equal(expected, result) +@pytest.mark.parametrize("array_type", + [numpy.bool8, numpy.int32, numpy.int64, numpy.float32, numpy.float64, numpy.complex128], + ids=['bool8', 'int32', 'int64', 'float32', 'float64', 'complex128']) +@pytest.mark.parametrize("indices_type", + [numpy.int32, numpy.int64], + ids=['int32', 'int64']) @pytest.mark.parametrize("indices", [[[0, 0], [0, 0]], [[1, 2], [1, 2]], @@ -395,9 +401,9 @@ def test_select(): '[[[[1, 2], [3, 4]], [[1, 2], [2, 1]]], [[[1, 3], [3, 1]], [[0, 1], [1, 3]]]]', '[[[[1, 2, 3], [3, 4, 5]], [[1, 2, 3], [2, 1, 0]]], [[[1, 3, 5], [3, 1, 0]], [[0, 1, 2], [1, 3, 4]]]]', '[[[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]], [[[13, 14, 15], [16, 17, 18]], [[19, 20, 21], [22, 23, 24]]]]']) -def test_take(array, indices): - a = numpy.array(array) - ind = numpy.array(indices) +def test_take(array, indices, array_type, indices_type): + a = numpy.array(array, dtype=array_type) + ind = numpy.array(indices, dtype=indices_type) ia = dpnp.array(a) iind = dpnp.array(ind) expected = numpy.take(a, ind)