From 885f91a9fd1cbd12f7123690f78bb00c02caf6fb Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Thu, 11 Aug 2022 10:27:02 -0500 Subject: [PATCH 01/30] dpnp_take_c uses SYCL kernel, no need to use no_sycl parameter in adapter The reason this crashed with CPU device and gave incorrect results on Windows was deeper. 1. Adapter call allocated USM-shared buffer and copies data into it 2. Kernel is submitted to work on USM-shared pointer 3. dpnp_take_c returns kernel submission even 4. Adapter class goes out of scope and frees USM allocation without making sure that the kernel that works on it has completed its execution 5. If kernel execution was in progress we got a crash on CPU, or incorrect result on GPU If kernel execution was complete it worked as expected. This change fixes the problem because it removes creation of unprotected USM-shared temporary. --- dpnp/backend/kernels/dpnp_krnl_indexing.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dpnp/backend/kernels/dpnp_krnl_indexing.cpp b/dpnp/backend/kernels/dpnp_krnl_indexing.cpp index ef6fffb8fb60..d37e319b7e3b 100644 --- a/dpnp/backend/kernels/dpnp_krnl_indexing.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_indexing.cpp @@ -901,7 +901,7 @@ DPCTLSyclEventRef dpnp_take_c(DPCTLSyclQueueRef q_ref, DPCTLSyclEventRef event_ref = nullptr; sycl::queue q = *(reinterpret_cast(q_ref)); - DPNPC_ptr_adapter<_DataType> input1_ptr(q_ref, array1_in, array1_size, true); + DPNPC_ptr_adapter<_DataType> input1_ptr(q_ref, array1_in, array1_size); DPNPC_ptr_adapter<_IndecesType> input2_ptr(q_ref, indices1, size); _DataType* array_1 = input1_ptr.get_ptr(); _IndecesType* indices = input2_ptr.get_ptr(); From 7220d872ee78929cad120f45da5f74e5d6feb4cd Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Thu, 11 Aug 2022 10:31:12 -0500 Subject: [PATCH 02/30] Change to DPNPC_adapter to set/use events upon which deallocation must depend The deallocation routine simply calls sycl::event::wait on the stored vector. --- dpnp/backend/src/dpnpc_memory_adapter.hpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/dpnp/backend/src/dpnpc_memory_adapter.hpp b/dpnp/backend/src/dpnpc_memory_adapter.hpp index 19b8df3bea7d..7b04b42a7566 100644 --- a/dpnp/backend/src/dpnpc_memory_adapter.hpp +++ b/dpnp/backend/src/dpnpc_memory_adapter.hpp @@ -52,6 +52,7 @@ class DPNPC_ptr_adapter final bool target_no_queue = false; /**< Indicates that original memory will be accessed from non SYCL environment */ bool copy_back = false; /**< If the memory is 'result' it needs to be copied back to original */ const bool verbose = false; + std::vector deps{}; public: DPNPC_ptr_adapter() = delete; @@ -68,6 +69,7 @@ class DPNPC_ptr_adapter final copy_back = copy_back_request; orig_ptr = const_cast(src_ptr); size_in_bytes = size * sizeof(_DataType); + deps = std::vector{}; // enum class alloc { host = 0, device = 1, shared = 2, unknown = 3 }; sycl::usm::alloc src_ptr_type = sycl::usm::alloc::unknown; @@ -117,6 +119,8 @@ class DPNPC_ptr_adapter final std::cerr << "DPNPC_ptr_converter::free_memory at=" << aux_ptr << std::endl; } + sycl::event::wait(deps); + if (copy_back) { copy_data_back(); @@ -158,6 +162,15 @@ class DPNPC_ptr_adapter final dpnp_memory_memcpy_c(queue_ref, orig_ptr, aux_ptr, size_in_bytes); } + + void depends_on(const std::vector &new_deps) { + deps.insert(std::end(deps), std::begin(new_deps), std::end(new_deps)); + } + + void depends_on(const sycl::event &new_dep) { + deps.push_back(new_dep); + } + }; #endif // DPNP_MEMORY_ADAPTER_H From 37386bb5802c5f3350cfca9059dd93d0ff231a26 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Sun, 14 Aug 2022 07:08:37 -0500 Subject: [PATCH 03/30] Used DPNPC_ptr_adapter::depends_on Also applied DPCTLEvent_Delete in legacy interfaces to avoid memory leak. --- .../kernels/dpnp_krnl_arraycreation.cpp | 47 ++++++++++++------- dpnp/backend/kernels/dpnp_krnl_bitwise.cpp | 24 ++++++---- dpnp/backend/kernels/dpnp_krnl_elemwise.cpp | 23 +++++++++ .../kernels/dpnp_krnl_mathematical.cpp | 8 ++++ dpnp/backend/kernels/dpnp_krnl_reduction.cpp | 2 + dpnp/backend/kernels/dpnp_krnl_sorting.cpp | 4 ++ dpnp/backend/kernels/dpnp_krnl_statistics.cpp | 34 +++++++++----- 7 files changed, 105 insertions(+), 37 deletions(-) diff --git a/dpnp/backend/kernels/dpnp_krnl_arraycreation.cpp b/dpnp/backend/kernels/dpnp_krnl_arraycreation.cpp index 302289220f81..3b7f362d51a5 100644 --- a/dpnp/backend/kernels/dpnp_krnl_arraycreation.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_arraycreation.cpp @@ -493,6 +493,9 @@ DPCTLSyclEventRef dpnp_ptp_c(DPCTLSyclQueueRef q_ref, (void)dep_event_vec_ref; DPCTLSyclEventRef event_ref = nullptr; + DPCTLSyclEventRef e1_ref = nullptr; + DPCTLSyclEventRef e2_ref = nullptr; + DPCTLSyclEventRef e3_ref = nullptr; if ((input1_in == nullptr) || (result1_out == nullptr)) { @@ -514,29 +517,36 @@ DPCTLSyclEventRef dpnp_ptp_c(DPCTLSyclQueueRef q_ref, _DataType* min_arr = reinterpret_cast<_DataType*>(sycl::malloc_shared(result_size * sizeof(_DataType), q)); _DataType* max_arr = reinterpret_cast<_DataType*>(sycl::malloc_shared(result_size * sizeof(_DataType), q)); - dpnp_min_c<_DataType>(arr, min_arr, result_size, input_shape, input_ndim, axis, naxis); - dpnp_max_c<_DataType>(arr, max_arr, result_size, input_shape, input_ndim, axis, naxis); + e1_ref = dpnp_min_c<_DataType>(q_ref, arr, min_arr, result_size, input_shape, input_ndim, axis, naxis, NULL); + e2_ref = dpnp_max_c<_DataType>(q_ref, arr, max_arr, result_size, input_shape, input_ndim, axis, naxis, NULL); shape_elem_type* _strides = reinterpret_cast(sycl::malloc_shared(result_ndim * sizeof(shape_elem_type), q)); get_shape_offsets_inkernel(result_shape, result_ndim, _strides); - dpnp_subtract_c<_DataType, _DataType, _DataType>(result, - result_size, - result_ndim, - result_shape, - result_strides, - max_arr, - result_size, - result_ndim, - result_shape, - _strides, - min_arr, - result_size, - result_ndim, - result_shape, - _strides, - NULL); + e3_ref = dpnp_subtract_c<_DataType, _DataType, _DataType>(q_ref, result, + result_size, + result_ndim, + result_shape, + result_strides, + max_arr, + result_size, + result_ndim, + result_shape, + _strides, + min_arr, + result_size, + result_ndim, + result_shape, + _strides, + NULL, NULL); + + DPCTLEvent_Wait(e1_ref); + DPCTLEvent_Wait(e2_ref); + DPCTLEvent_Wait(e3_ref); + DPCTLEvent_Delete(e1_ref); + DPCTLEvent_Delete(e2_ref); + DPCTLEvent_Delete(e3_ref); sycl::free(min_arr, q); sycl::free(max_arr, q); @@ -576,6 +586,7 @@ void dpnp_ptp_c(void* result1_out, naxis, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template diff --git a/dpnp/backend/kernels/dpnp_krnl_bitwise.cpp b/dpnp/backend/kernels/dpnp_krnl_bitwise.cpp index 4d0f6498ed0f..b64670be4e09 100644 --- a/dpnp/backend/kernels/dpnp_krnl_bitwise.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_bitwise.cpp @@ -148,16 +148,16 @@ static void func_map_init_bitwise_1arg_1type(func_map_t& fmap) \ sycl::queue q = *(reinterpret_cast(q_ref)); \ \ - DPNPC_ptr_adapter<_DataType> input1_ptr(q_ref, input1_in, input1_size); \ - DPNPC_ptr_adapter input1_shape_ptr(q_ref, input1_shape, input1_ndim, true); \ - DPNPC_ptr_adapter input1_strides_ptr(q_ref, input1_strides, input1_ndim, true); \ + DPNPC_ptr_adapter<_DataType> input1_ptr(q_ref, input1_in, input1_size); \ + DPNPC_ptr_adapter input1_shape_ptr(q_ref, input1_shape, input1_ndim, true); \ + DPNPC_ptr_adapter input1_strides_ptr(q_ref, input1_strides, input1_ndim, true); \ \ - DPNPC_ptr_adapter<_DataType> input2_ptr(q_ref, input2_in, input2_size); \ - DPNPC_ptr_adapter input2_shape_ptr(q_ref, input2_shape, input2_ndim, true); \ - DPNPC_ptr_adapter input2_strides_ptr(q_ref, input2_strides, input2_ndim, true); \ + DPNPC_ptr_adapter<_DataType> input2_ptr(q_ref, input2_in, input2_size); \ + DPNPC_ptr_adapter input2_shape_ptr(q_ref, input2_shape, input2_ndim, true); \ + DPNPC_ptr_adapter input2_strides_ptr(q_ref, input2_strides, input2_ndim, true); \ \ - DPNPC_ptr_adapter<_DataType> result_ptr(q_ref, result_out, result_size, false, true); \ - DPNPC_ptr_adapter result_strides_ptr(q_ref, result_strides, result_ndim); \ + DPNPC_ptr_adapter<_DataType> result_ptr(q_ref, result_out, result_size, false, true); \ + DPNPC_ptr_adapter result_strides_ptr(q_ref, result_strides, result_ndim); \ \ _DataType* input1_data = input1_ptr.get_ptr(); \ shape_elem_type* input1_shape_data = input1_shape_ptr.get_ptr(); \ @@ -226,6 +226,14 @@ static void func_map_init_bitwise_1arg_1type(func_map_t& fmap) }; \ event = q.submit(kernel_func); \ } \ + input1_ptr.depends_on(event); \ + input1_shape_ptr.depends_on(event); \ + input1_strides_ptr.depends_on(event); \ + input2_ptr.depends_on(event); \ + input2_shape_ptr.depends_on(event); \ + input2_strides_ptr.depends_on(event); \ + result_ptr.depends_on(event); \ + result_strides_ptr.depends_on(event); \ event_ref = reinterpret_cast(&event); \ \ return DPCTLEvent_Copy(event_ref); \ diff --git a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp index 98aff7eac758..63b6195e7889 100644 --- a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp @@ -143,6 +143,12 @@ } \ } \ \ + input1_ptr.depends_on(event); \ + input1_shape_ptr.depends_on(event); \ + input1_strides_ptr.depends_on(event); \ + result_ptr.depends_on(event); \ + result_strides_ptr.depends_on(event); \ + \ event_ref = reinterpret_cast(&event); \ \ return DPCTLEvent_Copy(event_ref); \ @@ -644,6 +650,12 @@ static void func_map_init_elemwise_1arg_2type(func_map_t& fmap) } \ } \ \ + input1_ptr.depends_on(event); \ + input1_shape_ptr.depends_on(event); \ + input1_strides_ptr.depends_on(event); \ + result_ptr.depends_on(event); \ + result_strides_ptr.depends_on(event); \ + \ event_ref = reinterpret_cast(&event); \ \ return DPCTLEvent_Copy(event_ref); \ @@ -998,6 +1010,17 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap) event = q.submit(kernel_func); \ } \ } \ + \ + input1_ptr.depends_on(event); \ + input1_shape_ptr.depends_on(event); \ + input1_strides_ptr.depends_on(event); \ + input2_ptr.depends_on(event); \ + input2_shape_ptr.depends_on(event); \ + input2_strides_ptr.depends_on(event); \ + result_ptr.depends_on(event); \ + result_shape_ptr.depends_on(event); \ + result_strides_ptr.depends_on(event); \ + \ event_ref = reinterpret_cast(&event); \ \ return DPCTLEvent_Copy(event_ref); \ diff --git a/dpnp/backend/kernels/dpnp_krnl_mathematical.cpp b/dpnp/backend/kernels/dpnp_krnl_mathematical.cpp index dc2493094a26..32f8ffe465d2 100644 --- a/dpnp/backend/kernels/dpnp_krnl_mathematical.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_mathematical.cpp @@ -170,6 +170,8 @@ DPCTLSyclEventRef dpnp_elemwise_absolute_c(DPCTLSyclQueueRef q_ref, event = q.submit(kernel_func); } + input1_ptr.depends_on(event); + result1_ptr.depends_on(event); event_ref = reinterpret_cast(&event); return DPCTLEvent_Copy(event_ref); @@ -483,6 +485,8 @@ DPCTLSyclEventRef dpnp_ediff1d_c(DPCTLSyclQueueRef q_ref, }; event = q.submit(kernel_func); + input1_ptr.depends_on(event); + result_ptr.depends_on(event); event_ref = reinterpret_cast(&event); return DPCTLEvent_Copy(event_ref); @@ -676,6 +680,7 @@ void dpnp_floor_divide_c(void* result_out, where, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -770,6 +775,7 @@ void dpnp_modf_c(void* array1_in, void* result1_out, void* result2_out, size_t s size, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -911,6 +917,7 @@ void dpnp_remainder_c(void* result_out, where, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -1041,6 +1048,7 @@ void dpnp_trapz_c( array2_size, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template diff --git a/dpnp/backend/kernels/dpnp_krnl_reduction.cpp b/dpnp/backend/kernels/dpnp_krnl_reduction.cpp index acda35db3a34..e03aabee5aec 100644 --- a/dpnp/backend/kernels/dpnp_krnl_reduction.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_reduction.cpp @@ -162,6 +162,7 @@ void dpnp_sum_c(void* result_out, where, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -278,6 +279,7 @@ void dpnp_prod_c(void* result_out, where, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template diff --git a/dpnp/backend/kernels/dpnp_krnl_sorting.cpp b/dpnp/backend/kernels/dpnp_krnl_sorting.cpp index 84e919954d47..614bb94f0705 100644 --- a/dpnp/backend/kernels/dpnp_krnl_sorting.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_sorting.cpp @@ -91,6 +91,7 @@ void dpnp_argsort_c(void* array1_in, void* result1, size_t size) size, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -242,6 +243,7 @@ void dpnp_partition_c( ndim, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -394,6 +396,7 @@ void dpnp_searchsorted_c( v_size, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -459,6 +462,7 @@ void dpnp_sort_c(void* array1_in, void* result1, size_t size) size, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template diff --git a/dpnp/backend/kernels/dpnp_krnl_statistics.cpp b/dpnp/backend/kernels/dpnp_krnl_statistics.cpp index 0617529b81ba..abf77ff25eec 100644 --- a/dpnp/backend/kernels/dpnp_krnl_statistics.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_statistics.cpp @@ -101,6 +101,7 @@ void dpnp_correlate_c(void* result_out, where, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -298,6 +299,7 @@ void dpnp_count_nonzero_c(void* array1_in, void* result1_out, size_t size) size, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -539,6 +541,7 @@ void dpnp_max_c(void* array1_in, naxis, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -636,6 +639,7 @@ void dpnp_mean_c(void* array1_in, naxis, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -721,6 +725,7 @@ void dpnp_median_c(void* array1_in, naxis, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -963,6 +968,7 @@ void dpnp_min_c(void* array1_in, naxis, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -1044,6 +1050,7 @@ void dpnp_nanvar_c(void* array1_in, void* mask_arr1, void* result1, const size_t arr_size, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -1100,17 +1107,20 @@ DPCTLSyclEventRef dpnp_std_c(DPCTLSyclQueueRef q_ref, q)); *var_strides = 1; - dpnp_sqrt_c<_ResultType, _ResultType>(result1, - result1_size, - result1_ndim, - result1_shape, - result1_strides, - var, - var_size, - var_ndim, - var_shape, - var_strides, - NULL); + DPCTLSyclEventRef e_sqrt_ref = + dpnp_sqrt_c<_ResultType, _ResultType>(q_ref, result1, + result1_size, + result1_ndim, + result1_shape, + result1_strides, + var, + var_size, + var_ndim, + var_shape, + var_strides, + NULL, NULL); + DPCTLEvent_WaitAndThrow(e_sqrt_ref); + DPCTLEvent_Delete(e_sqrt_ref); sycl::free(var, q); sycl::free(result1_shape, q); @@ -1142,6 +1152,7 @@ void dpnp_std_c(void* array1_in, ddof, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -1253,6 +1264,7 @@ void dpnp_var_c(void* array1_in, ddof, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template From 16a7632b87faba7257d6cb1b2e7bca2df6c4196b Mon Sep 17 00:00:00 2001 From: Anton <100830759+antonwolfy@users.noreply.github.com> Date: Wed, 17 Aug 2022 13:04:52 +0200 Subject: [PATCH 04/30] Get rid of "Improper Null Termination" issue Add a null-terminated symbol at the end of char array to avoid "Improper Null Termination" issue reported by Checkmarx scan. --- dpnp/backend/kernels/dpnp_krnl_random.cpp | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/dpnp/backend/kernels/dpnp_krnl_random.cpp b/dpnp/backend/kernels/dpnp_krnl_random.cpp index afc5df8187d3..47d104677471 100644 --- a/dpnp/backend/kernels/dpnp_krnl_random.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_random.cpp @@ -1674,14 +1674,17 @@ DPCTLSyclEventRef dpnp_rng_shuffle_c(DPCTLSyclQueueRef q_ref, // Fast, statically typed path: shuffle the underlying buffer. // Only for non-empty, 1d objects of class ndarray (subclasses such // as MaskedArrays may not support this approach). - char* buf = reinterpret_cast(sycl::malloc_shared(itemsize * sizeof(char), q)); + char* buf = reinterpret_cast(sycl::malloc_shared((itemsize + 1) * sizeof(char), q)); for (size_t i = uvec_size; i > 0; i--) { size_t j = (size_t)(floor((i + 1) * Uvec[i - 1])); if (i != j) { - auto memcpy1 = - q.submit([&](sycl::handler& h) { h.memcpy(buf, result1 + j * itemsize, itemsize); }); + auto memcpy1 = q.submit([&](sycl::handler& h) { + h.memcpy(buf, result1 + j * itemsize, itemsize); + // Make as null-terminated buffer to resolve CheckMarx's false positive issue + buf[itemsize] = '\0'; + }); auto memcpy2 = q.submit([&](sycl::handler& h) { h.depends_on({memcpy1}); h.memcpy(result1 + j * itemsize, result1 + i * itemsize, itemsize); @@ -1699,14 +1702,17 @@ DPCTLSyclEventRef dpnp_rng_shuffle_c(DPCTLSyclQueueRef q_ref, { // Multidimensional ndarrays require a bounce buffer. size_t step_size = (size / high_dim_size) * itemsize; // size in bytes for x[i] element - char* buf = reinterpret_cast(sycl::malloc_shared(step_size * sizeof(char), q)); + char* buf = reinterpret_cast(sycl::malloc_shared((step_size + 1) * sizeof(char), q)); for (size_t i = uvec_size; i > 0; i--) { size_t j = (size_t)(floor((i + 1) * Uvec[i - 1])); if (j < i) { - auto memcpy1 = - q.submit([&](sycl::handler& h) { h.memcpy(buf, result1 + j * step_size, step_size); }); + auto memcpy1 = q.submit([&](sycl::handler& h) { + h.memcpy(buf, result1 + j * step_size, step_size); + // Make as null-terminated buffer to resolve CheckMarx's false positive issue + buf[step_size] = '\0'; + }); auto memcpy2 = q.submit([&](sycl::handler& h) { h.depends_on({memcpy1}); h.memcpy(result1 + j * step_size, result1 + i * step_size, step_size); From d839ea1f9378229e6564a9ff6cc19017221be725 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Wed, 17 Aug 2022 13:32:08 -0500 Subject: [PATCH 05/30] implemented PR feedback --- dpnp/backend/src/dpnpc_memory_adapter.hpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dpnp/backend/src/dpnpc_memory_adapter.hpp b/dpnp/backend/src/dpnpc_memory_adapter.hpp index 7b04b42a7566..3b07795ed5f6 100644 --- a/dpnp/backend/src/dpnpc_memory_adapter.hpp +++ b/dpnp/backend/src/dpnpc_memory_adapter.hpp @@ -52,7 +52,7 @@ class DPNPC_ptr_adapter final bool target_no_queue = false; /**< Indicates that original memory will be accessed from non SYCL environment */ bool copy_back = false; /**< If the memory is 'result' it needs to be copied back to original */ const bool verbose = false; - std::vector deps{}; + std::vector deps; public: DPNPC_ptr_adapter() = delete; @@ -164,10 +164,12 @@ class DPNPC_ptr_adapter final } void depends_on(const std::vector &new_deps) { + assert(allocated); deps.insert(std::end(deps), std::begin(new_deps), std::end(new_deps)); } void depends_on(const sycl::event &new_dep) { + assert(allocated); deps.push_back(new_dep); } From 818dc82991642a621d10213ee573dde8a35310d4 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Thu, 18 Aug 2022 14:48:14 -0500 Subject: [PATCH 06/30] Reworked solution with a pointer on void --- dpnp/backend/kernels/dpnp_krnl_random.cpp | 38 +++++------------------ 1 file changed, 8 insertions(+), 30 deletions(-) diff --git a/dpnp/backend/kernels/dpnp_krnl_random.cpp b/dpnp/backend/kernels/dpnp_krnl_random.cpp index 47d104677471..5e3f9531b6c9 100644 --- a/dpnp/backend/kernels/dpnp_krnl_random.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_random.cpp @@ -1674,26 +1674,15 @@ DPCTLSyclEventRef dpnp_rng_shuffle_c(DPCTLSyclQueueRef q_ref, // Fast, statically typed path: shuffle the underlying buffer. // Only for non-empty, 1d objects of class ndarray (subclasses such // as MaskedArrays may not support this approach). - char* buf = reinterpret_cast(sycl::malloc_shared((itemsize + 1) * sizeof(char), q)); + void* buf = sycl::malloc_device(itemsize, q); for (size_t i = uvec_size; i > 0; i--) { size_t j = (size_t)(floor((i + 1) * Uvec[i - 1])); if (i != j) { - auto memcpy1 = q.submit([&](sycl::handler& h) { - h.memcpy(buf, result1 + j * itemsize, itemsize); - // Make as null-terminated buffer to resolve CheckMarx's false positive issue - buf[itemsize] = '\0'; - }); - auto memcpy2 = q.submit([&](sycl::handler& h) { - h.depends_on({memcpy1}); - h.memcpy(result1 + j * itemsize, result1 + i * itemsize, itemsize); - }); - auto memcpy3 = q.submit([&](sycl::handler& h) { - h.depends_on({memcpy2}); - h.memcpy(result1 + i * itemsize, buf, itemsize); - }); - memcpy3.wait(); + auto memcpy1 = q.memcpy(buf, result1 + j * itemsize, itemsize); + auto memcpy2 = q.memcpy(result1 + j * itemsize, result1 + i * itemsize, itemsize, memcpy1); + q.memcpy(result1 + i * itemsize, buf, itemsize, memcpy2).wait(); } } sycl::free(buf, q); @@ -1702,26 +1691,15 @@ DPCTLSyclEventRef dpnp_rng_shuffle_c(DPCTLSyclQueueRef q_ref, { // Multidimensional ndarrays require a bounce buffer. size_t step_size = (size / high_dim_size) * itemsize; // size in bytes for x[i] element - char* buf = reinterpret_cast(sycl::malloc_shared((step_size + 1) * sizeof(char), q)); + void* buf = sycl::malloc_device(step_size, q); for (size_t i = uvec_size; i > 0; i--) { size_t j = (size_t)(floor((i + 1) * Uvec[i - 1])); if (j < i) { - auto memcpy1 = q.submit([&](sycl::handler& h) { - h.memcpy(buf, result1 + j * step_size, step_size); - // Make as null-terminated buffer to resolve CheckMarx's false positive issue - buf[step_size] = '\0'; - }); - auto memcpy2 = q.submit([&](sycl::handler& h) { - h.depends_on({memcpy1}); - h.memcpy(result1 + j * step_size, result1 + i * step_size, step_size); - }); - auto memcpy3 = q.submit([&](sycl::handler& h) { - h.depends_on({memcpy2}); - h.memcpy(result1 + i * step_size, buf, step_size); - }); - memcpy3.wait(); + auto memcpy1 = q.memcpy(buf, result1 + j * step_size, step_size); + auto memcpy2 = q.memcpy(result1 + j * step_size, result1 + i * step_size, step_size, memcpy1); + q.memcpy(result1 + i * step_size, buf, step_size, memcpy2).wait(); } } sycl::free(buf, q); From b29d95782706d9056b2962de9e104046dc19721a Mon Sep 17 00:00:00 2001 From: Anton <100830759+antonwolfy@users.noreply.github.com> Date: Thu, 18 Aug 2022 22:06:54 +0200 Subject: [PATCH 07/30] Update dpnp/backend/kernels/dpnp_krnl_random.cpp Co-authored-by: Oleksandr Pavlyk --- dpnp/backend/kernels/dpnp_krnl_random.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dpnp/backend/kernels/dpnp_krnl_random.cpp b/dpnp/backend/kernels/dpnp_krnl_random.cpp index 5e3f9531b6c9..7b115351c14d 100644 --- a/dpnp/backend/kernels/dpnp_krnl_random.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_random.cpp @@ -1674,7 +1674,7 @@ DPCTLSyclEventRef dpnp_rng_shuffle_c(DPCTLSyclQueueRef q_ref, // Fast, statically typed path: shuffle the underlying buffer. // Only for non-empty, 1d objects of class ndarray (subclasses such // as MaskedArrays may not support this approach). - void* buf = sycl::malloc_device(itemsize, q); + void* buf = sycl::malloc_device(itemsize, q); for (size_t i = uvec_size; i > 0; i--) { size_t j = (size_t)(floor((i + 1) * Uvec[i - 1])); From 629d0e0367bc4851a292021eae7a115c46223b06 Mon Sep 17 00:00:00 2001 From: Anton <100830759+antonwolfy@users.noreply.github.com> Date: Thu, 18 Aug 2022 22:07:07 +0200 Subject: [PATCH 08/30] Update dpnp/backend/kernels/dpnp_krnl_random.cpp Co-authored-by: Oleksandr Pavlyk --- dpnp/backend/kernels/dpnp_krnl_random.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dpnp/backend/kernels/dpnp_krnl_random.cpp b/dpnp/backend/kernels/dpnp_krnl_random.cpp index 7b115351c14d..53207e67ff3e 100644 --- a/dpnp/backend/kernels/dpnp_krnl_random.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_random.cpp @@ -1691,7 +1691,7 @@ DPCTLSyclEventRef dpnp_rng_shuffle_c(DPCTLSyclQueueRef q_ref, { // Multidimensional ndarrays require a bounce buffer. size_t step_size = (size / high_dim_size) * itemsize; // size in bytes for x[i] element - void* buf = sycl::malloc_device(step_size, q); + void* buf = sycl::malloc_device(step_size, q); for (size_t i = uvec_size; i > 0; i--) { size_t j = (size_t)(floor((i + 1) * Uvec[i - 1])); From 2fed06a9210c9251fe3e04a424d3f217cf9e0d86 Mon Sep 17 00:00:00 2001 From: Alexander Rybkin Date: Wed, 24 Aug 2022 15:14:31 +0200 Subject: [PATCH 09/30] Skip for two more tests till waiting fix (#1171) * Skip for two more tests till waiting fix tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_ptp_all_nan tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_ptp_nan Need to skip them because CI does not work due to this. * The same tests skip for gpu --- tests/skipped_tests.tbl | 2 ++ tests/skipped_tests_gpu.tbl | 2 ++ 2 files changed, 4 insertions(+) diff --git a/tests/skipped_tests.tbl b/tests/skipped_tests.tbl index f9863951f5b2..bbf3c1c3b535 100644 --- a/tests/skipped_tests.tbl +++ b/tests/skipped_tests.tbl @@ -1377,3 +1377,5 @@ tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_percentil tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_percentile_scalar_q tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_percentile_tuple_axis tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_percentile_uxpected_interpolation +tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_ptp_all_nan +tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_ptp_nan diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl index 5c823c299d54..6a4bd6e6bb87 100644 --- a/tests/skipped_tests_gpu.tbl +++ b/tests/skipped_tests_gpu.tbl @@ -1804,3 +1804,5 @@ tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_percentil tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_percentile_scalar_q tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_percentile_tuple_axis tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_percentile_uxpected_interpolation +tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_ptp_all_nan +tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_ptp_nan From 9b14f0ca76a9e0c309bb97b4d5caa0870eecd6bb Mon Sep 17 00:00:00 2001 From: Anton <100830759+antonwolfy@users.noreply.github.com> Date: Tue, 30 Aug 2022 14:35:28 +0200 Subject: [PATCH 10/30] dpnp_take failed on Windows due to memory corruption (#1172) * dpnp_take failed on Windows due to memory corruption * Add more tests * Integer indexes types with different types of input data * Add trailing empty line to .gitignore --- .gitignore | 10 ++++- dpnp/backend/kernels/dpnp_krnl_indexing.cpp | 49 +++++++++++++-------- tests/skipped_tests_gpu.tbl | 13 ------ tests/test_indexing.py | 12 +++-- 4 files changed, 49 insertions(+), 35 deletions(-) diff --git a/.gitignore b/.gitignore index 2ac17b1752b8..ea56758f290b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,15 @@ +# CMake build and local install directory build build_cython + +# Byte-compiled / optimized / DLL files __pycache__/ + +# Code project files +.vscode + *dpnp_backend* dpnp/**/*.cpython*.so dpnp/**/*.pyd -*~ \ No newline at end of file +*~ +core diff --git a/dpnp/backend/kernels/dpnp_krnl_indexing.cpp b/dpnp/backend/kernels/dpnp_krnl_indexing.cpp index d37e319b7e3b..5cde013b69f8 100644 --- a/dpnp/backend/kernels/dpnp_krnl_indexing.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_indexing.cpp @@ -901,10 +901,8 @@ DPCTLSyclEventRef dpnp_take_c(DPCTLSyclQueueRef q_ref, DPCTLSyclEventRef event_ref = nullptr; sycl::queue q = *(reinterpret_cast(q_ref)); - DPNPC_ptr_adapter<_DataType> input1_ptr(q_ref, array1_in, array1_size); - DPNPC_ptr_adapter<_IndecesType> input2_ptr(q_ref, indices1, size); - _DataType* array_1 = input1_ptr.get_ptr(); - _IndecesType* indices = input2_ptr.get_ptr(); + _DataType* array_1 = reinterpret_cast<_DataType*>(array1_in); + _IndecesType* indices = reinterpret_cast<_IndecesType*>(indices1); _DataType* result = reinterpret_cast<_DataType*>(result1); sycl::range<1> gws(size); @@ -920,7 +918,6 @@ DPCTLSyclEventRef dpnp_take_c(DPCTLSyclQueueRef q_ref, sycl::event event = q.submit(kernel_func); event_ref = reinterpret_cast(&event); - return DPCTLEvent_Copy(event_ref); } @@ -937,6 +934,7 @@ void dpnp_take_c(void* array1_in, const size_t array1_size, void* indices1, void size, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -1073,21 +1071,36 @@ void func_map_init_indexing_func(func_map_t& fmap) fmap[DPNPFuncName::DPNP_FN_PUT_ALONG_AXIS_EXT][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_put_along_axis_ext_c}; - fmap[DPNPFuncName::DPNP_FN_TAKE][eft_BLN][eft_BLN] = {eft_BLN, (void*)dpnp_take_default_c}; - fmap[DPNPFuncName::DPNP_FN_TAKE][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_take_default_c}; + fmap[DPNPFuncName::DPNP_FN_TAKE][eft_BLN][eft_INT] = {eft_BLN, (void*)dpnp_take_default_c}; + fmap[DPNPFuncName::DPNP_FN_TAKE][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_take_default_c}; + fmap[DPNPFuncName::DPNP_FN_TAKE][eft_LNG][eft_INT] = {eft_LNG, (void*)dpnp_take_default_c}; + fmap[DPNPFuncName::DPNP_FN_TAKE][eft_FLT][eft_INT] = {eft_FLT, (void*)dpnp_take_default_c}; + fmap[DPNPFuncName::DPNP_FN_TAKE][eft_DBL][eft_INT] = {eft_DBL, (void*)dpnp_take_default_c}; + fmap[DPNPFuncName::DPNP_FN_TAKE][eft_C128][eft_INT] = {eft_C128, + (void*)dpnp_take_default_c, int32_t>}; + fmap[DPNPFuncName::DPNP_FN_TAKE][eft_BLN][eft_LNG] = {eft_BLN, (void*)dpnp_take_default_c}; + fmap[DPNPFuncName::DPNP_FN_TAKE][eft_INT][eft_LNG] = {eft_INT, (void*)dpnp_take_default_c}; fmap[DPNPFuncName::DPNP_FN_TAKE][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_take_default_c}; - fmap[DPNPFuncName::DPNP_FN_TAKE][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_take_default_c}; - fmap[DPNPFuncName::DPNP_FN_TAKE][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_take_default_c}; - fmap[DPNPFuncName::DPNP_FN_TAKE][eft_C128][eft_C128] = {eft_C128, - (void*)dpnp_take_default_c, int64_t>}; - - fmap[DPNPFuncName::DPNP_FN_TAKE_EXT][eft_BLN][eft_BLN] = {eft_BLN, (void*)dpnp_take_ext_c}; - fmap[DPNPFuncName::DPNP_FN_TAKE_EXT][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_take_ext_c}; + fmap[DPNPFuncName::DPNP_FN_TAKE][eft_FLT][eft_LNG] = {eft_FLT, (void*)dpnp_take_default_c}; + fmap[DPNPFuncName::DPNP_FN_TAKE][eft_DBL][eft_LNG] = {eft_DBL, (void*)dpnp_take_default_c}; + fmap[DPNPFuncName::DPNP_FN_TAKE][eft_C128][eft_LNG] = {eft_C128, + (void*)dpnp_take_default_c, int64_t>}; + + // TODO: add a handling of other indexes types once DPCtl implementation of data copy is ready + fmap[DPNPFuncName::DPNP_FN_TAKE_EXT][eft_BLN][eft_INT] = {eft_BLN, (void*)dpnp_take_ext_c}; + fmap[DPNPFuncName::DPNP_FN_TAKE_EXT][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_take_ext_c}; + fmap[DPNPFuncName::DPNP_FN_TAKE_EXT][eft_LNG][eft_INT] = {eft_LNG, (void*)dpnp_take_ext_c}; + fmap[DPNPFuncName::DPNP_FN_TAKE_EXT][eft_FLT][eft_INT] = {eft_FLT, (void*)dpnp_take_ext_c}; + fmap[DPNPFuncName::DPNP_FN_TAKE_EXT][eft_DBL][eft_INT] = {eft_DBL, (void*)dpnp_take_ext_c}; + fmap[DPNPFuncName::DPNP_FN_TAKE_EXT][eft_C128][eft_INT] = {eft_C128, + (void*)dpnp_take_ext_c, int32_t>}; + fmap[DPNPFuncName::DPNP_FN_TAKE_EXT][eft_BLN][eft_LNG] = {eft_BLN, (void*)dpnp_take_ext_c}; + fmap[DPNPFuncName::DPNP_FN_TAKE_EXT][eft_INT][eft_LNG] = {eft_INT, (void*)dpnp_take_ext_c}; fmap[DPNPFuncName::DPNP_FN_TAKE_EXT][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_take_ext_c}; - fmap[DPNPFuncName::DPNP_FN_TAKE_EXT][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_take_ext_c}; - fmap[DPNPFuncName::DPNP_FN_TAKE_EXT][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_take_ext_c}; - fmap[DPNPFuncName::DPNP_FN_TAKE_EXT][eft_C128][eft_C128] = {eft_C128, - (void*)dpnp_take_ext_c, int64_t>}; + fmap[DPNPFuncName::DPNP_FN_TAKE_EXT][eft_FLT][eft_LNG] = {eft_FLT, (void*)dpnp_take_ext_c}; + fmap[DPNPFuncName::DPNP_FN_TAKE_EXT][eft_DBL][eft_LNG] = {eft_DBL, (void*)dpnp_take_ext_c}; + fmap[DPNPFuncName::DPNP_FN_TAKE_EXT][eft_C128][eft_LNG] = {eft_C128, + (void*)dpnp_take_ext_c, int64_t>}; return; } diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl index 6a4bd6e6bb87..d41fe24c3c70 100644 --- a/tests/skipped_tests_gpu.tbl +++ b/tests/skipped_tests_gpu.tbl @@ -138,19 +138,6 @@ tests/test_indexing.py::test_nonzero[[[0, 1, 2], [3, 0, 5], [6, 7, 0]]] tests/test_indexing.py::test_nonzero[[[0, 1, 0, 3, 0], [5, 0, 7, 0, 9]]] tests/test_indexing.py::test_nonzero[[[[1, 2], [0, 4]], [[0, 2], [0, 1]], [[0, 0], [3, 1]]]] tests/test_indexing.py::test_nonzero[[[[[1, 2, 3], [3, 4, 5]], [[1, 2, 3], [2, 1, 0]]], [[[1, 3, 5], [3, 1, 0]], [[0, 1, 2], [1, 3, 4]]]]] -tests/test_indexing.py::test_take[[[0, 1, 2], [3, 4, 5], [6, 7, 8]]-[[0, 0], [0, 0]]] -tests/test_indexing.py::test_take[[[0, 1, 2], [3, 4, 5], [6, 7, 8]]-[[1, 2], [1, 2]]] -tests/test_indexing.py::test_take[[[0, 1, 2], [3, 4, 5], [6, 7, 8]]-[[1, 2], [3, 4]]] -tests/test_indexing.py::test_take[[[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]]-[[1, 2], [1, 2]]] -tests/test_indexing.py::test_take[[[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]]-[[1, 2], [3, 4]]] -tests/test_indexing.py::test_take[[[[1, 2], [3, 4]], [[1, 2], [2, 1]], [[1, 3], [3, 1]]]-[[1, 2], [1, 2]]] -tests/test_indexing.py::test_take[[[[1, 2], [3, 4]], [[1, 2], [2, 1]], [[1, 3], [3, 1]]]-[[1, 2], [3, 4]]] -tests/test_indexing.py::test_take[[[[[1, 2], [3, 4]], [[1, 2], [2, 1]]], [[[1, 3], [3, 1]], [[0, 1], [1, 3]]]]-[[1, 2], [1, 2]]] -tests/test_indexing.py::test_take[[[[[1, 2], [3, 4]], [[1, 2], [2, 1]]], [[[1, 3], [3, 1]], [[0, 1], [1, 3]]]]-[[1, 2], [3, 4]]] -tests/test_indexing.py::test_take[[[[[1, 2, 3], [3, 4, 5]], [[1, 2, 3], [2, 1, 0]]], [[[1, 3, 5], [3, 1, 0]], [[0, 1, 2], [1, 3, 4]]]]-[[1, 2], [1, 2]]] -tests/test_indexing.py::test_take[[[[[1, 2, 3], [3, 4, 5]], [[1, 2, 3], [2, 1, 0]]], [[[1, 3, 5], [3, 1, 0]], [[0, 1, 2], [1, 3, 4]]]]-[[1, 2], [3, 4]]] -tests/test_indexing.py::test_take[[[[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]], [[[13, 14, 15], [16, 17, 18]], [[19, 20, 21], [22, 23, 24]]]]-[[1, 2], [1, 2]]] -tests/test_indexing.py::test_take[[[[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]], [[[13, 14, 15], [16, 17, 18]], [[19, 20, 21], [22, 23, 24]]]]-[[1, 2], [3, 4]]] tests/third_party/cupy/creation_tests/test_ranges.py::TestRanges::test_arange_no_dtype_int tests/third_party/cupy/indexing_tests/test_indexing.py::TestIndexing::test_take_no_axis tests/third_party/cupy/indexing_tests/test_insert.py::TestPlace_param_3_{n_vals=1, shape=(7,)}::test_place diff --git a/tests/test_indexing.py b/tests/test_indexing.py index c07beee0262e..6519576171d0 100644 --- a/tests/test_indexing.py +++ b/tests/test_indexing.py @@ -374,6 +374,12 @@ def test_select(): numpy.testing.assert_array_equal(expected, result) +@pytest.mark.parametrize("array_type", + [numpy.bool8, numpy.int32, numpy.int64, numpy.float32, numpy.float64, numpy.complex128], + ids=['bool8', 'int32', 'int64', 'float32', 'float64', 'complex128']) +@pytest.mark.parametrize("indices_type", + [numpy.int32, numpy.int64], + ids=['int32', 'int64']) @pytest.mark.parametrize("indices", [[[0, 0], [0, 0]], [[1, 2], [1, 2]], @@ -395,9 +401,9 @@ def test_select(): '[[[[1, 2], [3, 4]], [[1, 2], [2, 1]]], [[[1, 3], [3, 1]], [[0, 1], [1, 3]]]]', '[[[[1, 2, 3], [3, 4, 5]], [[1, 2, 3], [2, 1, 0]]], [[[1, 3, 5], [3, 1, 0]], [[0, 1, 2], [1, 3, 4]]]]', '[[[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]], [[[13, 14, 15], [16, 17, 18]], [[19, 20, 21], [22, 23, 24]]]]']) -def test_take(array, indices): - a = numpy.array(array) - ind = numpy.array(indices) +def test_take(array, indices, array_type, indices_type): + a = numpy.array(array, dtype=array_type) + ind = numpy.array(indices, dtype=indices_type) ia = dpnp.array(a) iind = dpnp.array(ind) expected = numpy.take(a, ind) From c91f91282c6b0e0528445fd5b0cc4ce8e16946d3 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Thu, 8 Sep 2022 07:15:32 -0500 Subject: [PATCH 11/30] Add workflow for Win Fix typo Relax a strict pinning for numpy & cmake Update run command for conda build on Win Fix declaring DPLROOT env Fix DPLROOT source Fix DPLROOT for Win Add missing double quotes Try conda-incubator for Linux Setup conda-incubator for Linux Update caching Exclude python 3.8 Strickly pin on 3.8.13 Change channel order Fix artifcat uploading Replace to single quotes Add missing backslash Corect backslash --- .github/workflows/conda-package.yml | 178 ++++++++++++++++++++++++---- conda-recipe/bld.bat | 1 + conda-recipe/meta.yaml | 8 +- scripts/build_conda_package.sh | 2 +- utils/dpnp_build_utils.py | 2 +- 5 files changed, 164 insertions(+), 27 deletions(-) diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml index e401cf78c837..00e05f687ea3 100644 --- a/.github/workflows/conda-package.yml +++ b/.github/workflows/conda-package.yml @@ -1,20 +1,35 @@ name: Conda package -on: push +on: + push: + branches: + - master + pull_request: env: - PACKAGE_NAME: dpctl + PACKAGE_NAME: dpnp + MODULE_NAME: dpnp + VER_SCRIPT1: "import json; f = open('ver.json', 'r'); j = json.load(f); f.close(); " + VER_SCRIPT2: "d = j['dpnp'][0]; print('='.join((d[s] for s in ('version', 'build'))))" jobs: - build: - runs-on: ubuntu-20.04 + build_linux: + runs-on: ubuntu-latest + + defaults: + run: + shell: bash -l {0} strategy: matrix: - python: [3.8, 3.9] + python: ['3.8', '3.9'] + + env: + conda-pkgs: '/home/runner/conda_pkgs_dir/' + conda-bld: '/usr/share/miniconda3/envs/build/conda-bld/linux-64/' steps: - - name: Checkout repo + - name: Checkout DPNP repo uses: actions/checkout@v3 with: fetch-depth: 0 @@ -24,44 +39,165 @@ jobs: with: repository: oneapi-src/oneDPL path: oneDPL - ref: oneDPL-2021.6.1-release + ref: oneDPL-2021.7.0-release + + - name: Setup miniconda + uses: conda-incubator/setup-miniconda@v2 + with: + auto-update-conda: true + python-version: ${{ matrix.python }} + miniconda-version: 'latest' + activate-environment: 'build' + use-only-tar-bz2: true + + - name: Cache conda packages + uses: actions/cache@v3 + env: + CACHE_NUMBER: 1 # Increase to reset cache + with: + path: ${{ env.conda-pkgs }} + key: + ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-python-${{ matrix.python }}-${{hashFiles('**/meta.yaml') }} + restore-keys: | + ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-python-${{ matrix.python }}- + ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}- - - name: Add conda to system path - run: echo $CONDA/bin >> $GITHUB_PATH - name: Install conda-build run: conda install conda-build + - name: Build conda package run: ./scripts/build_conda_package.sh ${{ matrix.python }} $GITHUB_WORKSPACE/oneDPL - name: Upload artifact uses: actions/upload-artifact@v2 with: - name: dpnp ${{ runner.os }} ${{ matrix.python }} - path: /usr/share/miniconda/conda-bld/linux-64/dpnp-*.tar.bz2 + name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Python ${{ matrix.python }} + path: ${{ env.conda-bld }}${{ env.PACKAGE_NAME }}-*.tar.bz2 + + build_windows: + runs-on: windows-latest + + defaults: + run: + shell: cmd /C CALL {0} + + strategy: + matrix: + python: ['3.8', '3.9'] + + env: + conda-pkgs: 'C:\Users\runneradmin\conda_pkgs_dir\' + conda-bld: 'C:\Miniconda3\envs\build\conda-bld\win-64\' + + steps: + - name: Checkout DPNP repo + uses: actions/checkout@v3 + with: + fetch-depth: 0 + + - name: Checkout oneDPL + uses: actions/checkout@v3 + with: + repository: oneapi-src/oneDPL + path: oneDPL + ref: oneDPL-2021.7.0-release + + - name: Setup miniconda + uses: conda-incubator/setup-miniconda@v2 + with: + auto-update-conda: true + python-version: ${{ matrix.python }} + miniconda-version: 'latest' + activate-environment: 'build' + use-only-tar-bz2: true + + - name: Cache conda packages + uses: actions/cache@v3 + env: + CACHE_NUMBER: 1 # Increase to reset cache + with: + path: ${{ env.conda-pkgs }} + key: + ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-python-${{ matrix.python }}-${{hashFiles('**/meta.yaml') }} + restore-keys: | + ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-python-${{ matrix.python }}- + ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}- + + - name: Install conda-build + run: conda install conda-build + + - name: Build conda package + run: conda build --no-test --python ${{ matrix.python }} -c dppy/label/dev -c intel -c defaults --override-channels conda-recipe + env: + DPLROOT: '%GITHUB_WORKSPACE%\oneDPL' + + - name: Upload artifact + uses: actions/upload-artifact@v2 + with: + name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Python ${{ matrix.python }} + path: ${{ env.conda-bld }}${{ env.PACKAGE_NAME }}-*.tar.bz2 + + upload_linux: + needs: build_linux + + if: ${{github.ref == 'refs/heads/master' || (startsWith(github.ref, 'refs/heads/release') == true) || github.event_name == 'push' && contains(github.ref, 'refs/tags/')}} - upload: - needs: build - if: ${{ github.ref == 'refs/heads/master' }} runs-on: ubuntu-latest strategy: matrix: - python: [3.8, 3.9] + python: ['3.8', '3.9'] + + steps: + - name: Download artifact + uses: actions/download-artifact@v2 + with: + name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Python ${{ matrix.python }} + - name: Setup miniconda + uses: conda-incubator/setup-miniconda@v2 + with: + auto-update-conda: true + python-version: ${{ matrix.python }} + miniconda-version: 'latest' + activate-environment: 'upload' + + - name: Install anaconda-client + run: conda install anaconda-client + + - name: Upload + env: + ANACONDA_TOKEN: ${{ secrets.ANACONDA_TOKEN }} + run: anaconda --token ${{ env.ANACONDA_TOKEN }} upload --user dppy --label dev ${{ env.PACKAGE_NAME }}-*.tar.bz2 + + upload_windows: + needs: build_windows + + if: ${{github.ref == 'refs/heads/master' || (startsWith(github.ref, 'refs/heads/release') == true) || github.event_name == 'push' && contains(github.ref, 'refs/tags/')}} + + runs-on: windows-latest + + strategy: + matrix: + python: ['3.8', '3.9'] steps: - name: Download artifact uses: actions/download-artifact@v2 with: - name: dpnp ${{ runner.os }} ${{ matrix.python }} + name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Python ${{ matrix.python }} + + - name: Setup miniconda + uses: conda-incubator/setup-miniconda@v2 + with: + auto-update-conda: true + python-version: ${{ matrix.python }} + miniconda-version: 'latest' + activate-environment: 'upload' - name: Install anaconda-client run: conda install anaconda-client - - name: Add conda to system path - run: echo $CONDA/bin >> $GITHUB_PATH - name: Upload env: ANACONDA_TOKEN: ${{ secrets.ANACONDA_TOKEN }} - run: | - conda install anaconda-client - anaconda --token $ANACONDA_TOKEN upload --user dppy --label dev dpnp-*.tar.bz2 + run: anaconda --token ${{ env.ANACONDA_TOKEN }} upload --user dppy --label dev ${{ env.PACKAGE_NAME }}-*.tar.bz2 diff --git a/conda-recipe/bld.bat b/conda-recipe/bld.bat index 1695b2e74c90..6d4389b93dd0 100644 --- a/conda-recipe/bld.bat +++ b/conda-recipe/bld.bat @@ -1,4 +1,5 @@ REM A workaround for activate-dpcpp.bat issue to be addressed in 2021.4 +set "LIB=%BUILD_PREFIX%\Library\lib;%BUILD_PREFIX%\compiler\lib;%LIB%" SET "INCLUDE=%BUILD_PREFIX%\include;%INCLUDE%" IF DEFINED DPLROOT ( diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml index bc705e2715d2..c24d86e5473b 100644 --- a/conda-recipe/meta.yaml +++ b/conda-recipe/meta.yaml @@ -8,16 +8,16 @@ requirements: host: - python - setuptools - - numpy 1.19 + - numpy >=1.19 - cython - - cmake 3.19 + - cmake >=3.19 - dpctl >=0.13 - mkl-devel-dpcpp {{ environ.get('MKL_VER', '>=2021.1.1') }} - tbb-devel - wheel build: - - {{ compiler('dpcpp') }} - - dpcpp-cpp-rt {{ environ.get('DPCPP_VER', '>=2021.1.1') }} + - {{ compiler('cxx') }} + - {{ compiler('dpcpp') }} >=2022.1 # [not osx] run: - python - dpctl >=0.13 diff --git a/scripts/build_conda_package.sh b/scripts/build_conda_package.sh index c9ad065b1509..ae9474e1f773 100755 --- a/scripts/build_conda_package.sh +++ b/scripts/build_conda_package.sh @@ -5,7 +5,7 @@ DPLROOT=$2 export DPLROOT -CHANNELS="-c dppy/label/dev -c intel -c defaults --override-channels" +CHANNELS="-c dppy/label/dev -c defaults -c intel --override-channels" VERSIONS="--python $PYTHON_VERSION" TEST="--no-test" diff --git a/utils/dpnp_build_utils.py b/utils/dpnp_build_utils.py index d06096c4b086..2ccf211587d3 100644 --- a/utils/dpnp_build_utils.py +++ b/utils/dpnp_build_utils.py @@ -126,7 +126,7 @@ def find_cmplr(verbose=False): verbose=verbose) # try to find in Python environment - if not cmplr_include or not mathlib_path: + if not cmplr_include or not cmplr_libpath: if sys.platform in ['linux']: rel_include_path = os.path.join('include') rel_libdir_path = os.path.join('lib') From 31144c604277c6e98146bd8579f31c7e2a9d5535 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Thu, 8 Sep 2022 15:57:06 -0500 Subject: [PATCH 12/30] Attempt to fix workflow --- .github/workflows/conda-package.yml | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml index 00e05f687ea3..a2321b268e9d 100644 --- a/.github/workflows/conda-package.yml +++ b/.github/workflows/conda-package.yml @@ -162,13 +162,12 @@ jobs: miniconda-version: 'latest' activate-environment: 'upload' - - name: Install anaconda-client - run: conda install anaconda-client - - - name: Upload + - name: Install client and Upload env: ANACONDA_TOKEN: ${{ secrets.ANACONDA_TOKEN }} - run: anaconda --token ${{ env.ANACONDA_TOKEN }} upload --user dppy --label dev ${{ env.PACKAGE_NAME }}-*.tar.bz2 + run: | + conda install anaconda-client + anaconda --token ${{ env.ANACONDA_TOKEN }} upload --user dppy --label dev ${{ env.PACKAGE_NAME }}-*.tar.bz2 upload_windows: needs: build_windows @@ -194,10 +193,9 @@ jobs: miniconda-version: 'latest' activate-environment: 'upload' - - name: Install anaconda-client - run: conda install anaconda-client - - - name: Upload + - name: Install client and Upload env: ANACONDA_TOKEN: ${{ secrets.ANACONDA_TOKEN }} - run: anaconda --token ${{ env.ANACONDA_TOKEN }} upload --user dppy --label dev ${{ env.PACKAGE_NAME }}-*.tar.bz2 + run: | + conda install anaconda-client + anaconda --token ${{ env.ANACONDA_TOKEN }} upload --user dppy --label dev ${{ env.PACKAGE_NAME }}-*.tar.bz2 From ee65713c94c047731369e0051be3cb227b3b195d Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Thu, 8 Sep 2022 16:52:28 -0500 Subject: [PATCH 13/30] attempt to fix upload steps of the workflow on Linux --- .github/workflows/conda-package.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml index a2321b268e9d..23c36f29c4eb 100644 --- a/.github/workflows/conda-package.yml +++ b/.github/workflows/conda-package.yml @@ -45,6 +45,7 @@ jobs: uses: conda-incubator/setup-miniconda@v2 with: auto-update-conda: true + auto-activate-base: true python-version: ${{ matrix.python }} miniconda-version: 'latest' activate-environment: 'build' @@ -106,6 +107,7 @@ jobs: uses: conda-incubator/setup-miniconda@v2 with: auto-update-conda: true + auto-activate-base: true python-version: ${{ matrix.python }} miniconda-version: 'latest' activate-environment: 'build' @@ -158,6 +160,7 @@ jobs: uses: conda-incubator/setup-miniconda@v2 with: auto-update-conda: true + auto-activate-base: true python-version: ${{ matrix.python }} miniconda-version: 'latest' activate-environment: 'upload' @@ -166,6 +169,7 @@ jobs: env: ANACONDA_TOKEN: ${{ secrets.ANACONDA_TOKEN }} run: | + conda activate upload conda install anaconda-client anaconda --token ${{ env.ANACONDA_TOKEN }} upload --user dppy --label dev ${{ env.PACKAGE_NAME }}-*.tar.bz2 @@ -189,6 +193,7 @@ jobs: uses: conda-incubator/setup-miniconda@v2 with: auto-update-conda: true + auto-activate-base: true python-version: ${{ matrix.python }} miniconda-version: 'latest' activate-environment: 'upload' @@ -197,5 +202,6 @@ jobs: env: ANACONDA_TOKEN: ${{ secrets.ANACONDA_TOKEN }} run: | + conda activate upload conda install anaconda-client anaconda --token ${{ env.ANACONDA_TOKEN }} upload --user dppy --label dev ${{ env.PACKAGE_NAME }}-*.tar.bz2 From 2ea4aac5377d381228a8e59da1876c16cc551035 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Thu, 8 Sep 2022 19:52:57 -0500 Subject: [PATCH 14/30] Another attempt to fix upload step of conda-package workflow --- .github/workflows/conda-package.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml index 23c36f29c4eb..c87084600ebb 100644 --- a/.github/workflows/conda-package.yml +++ b/.github/workflows/conda-package.yml @@ -169,7 +169,10 @@ jobs: env: ANACONDA_TOKEN: ${{ secrets.ANACONDA_TOKEN }} run: | + source $CONDA/etc/profile.d/conda.sh + conda info conda activate upload + ls -lF $CONDA_PREFIX/bin conda install anaconda-client anaconda --token ${{ env.ANACONDA_TOKEN }} upload --user dppy --label dev ${{ env.PACKAGE_NAME }}-*.tar.bz2 From ac79e9fb7b8f405a2d26b943ca4bef0e318095ec Mon Sep 17 00:00:00 2001 From: Anton <100830759+antonwolfy@users.noreply.github.com> Date: Fri, 9 Sep 2022 13:52:56 +0200 Subject: [PATCH 15/30] Set default shell in upload actions (#1180) --- .github/workflows/conda-package.yml | 35 +++++++++++++++-------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml index c87084600ebb..50b573a3e364 100644 --- a/.github/workflows/conda-package.yml +++ b/.github/workflows/conda-package.yml @@ -45,7 +45,6 @@ jobs: uses: conda-incubator/setup-miniconda@v2 with: auto-update-conda: true - auto-activate-base: true python-version: ${{ matrix.python }} miniconda-version: 'latest' activate-environment: 'build' @@ -107,7 +106,6 @@ jobs: uses: conda-incubator/setup-miniconda@v2 with: auto-update-conda: true - auto-activate-base: true python-version: ${{ matrix.python }} miniconda-version: 'latest' activate-environment: 'build' @@ -146,6 +144,10 @@ jobs: runs-on: ubuntu-latest + defaults: + run: + shell: bash -l {0} + strategy: matrix: python: ['3.8', '3.9'] @@ -160,21 +162,17 @@ jobs: uses: conda-incubator/setup-miniconda@v2 with: auto-update-conda: true - auto-activate-base: true python-version: ${{ matrix.python }} miniconda-version: 'latest' activate-environment: 'upload' - - name: Install client and Upload + - name: Install anaconda-client + run: conda install anaconda-client + + - name: Upload env: ANACONDA_TOKEN: ${{ secrets.ANACONDA_TOKEN }} - run: | - source $CONDA/etc/profile.d/conda.sh - conda info - conda activate upload - ls -lF $CONDA_PREFIX/bin - conda install anaconda-client - anaconda --token ${{ env.ANACONDA_TOKEN }} upload --user dppy --label dev ${{ env.PACKAGE_NAME }}-*.tar.bz2 + run: anaconda --token ${{ env.ANACONDA_TOKEN }} upload --user dppy --label dev ${{ env.PACKAGE_NAME }}-*.tar.bz2 upload_windows: needs: build_windows @@ -183,6 +181,10 @@ jobs: runs-on: windows-latest + defaults: + run: + shell: cmd /C CALL {0} + strategy: matrix: python: ['3.8', '3.9'] @@ -196,15 +198,14 @@ jobs: uses: conda-incubator/setup-miniconda@v2 with: auto-update-conda: true - auto-activate-base: true python-version: ${{ matrix.python }} miniconda-version: 'latest' activate-environment: 'upload' - - name: Install client and Upload + - name: Install anaconda-client + run: conda install anaconda-client + + - name: Upload env: ANACONDA_TOKEN: ${{ secrets.ANACONDA_TOKEN }} - run: | - conda activate upload - conda install anaconda-client - anaconda --token ${{ env.ANACONDA_TOKEN }} upload --user dppy --label dev ${{ env.PACKAGE_NAME }}-*.tar.bz2 + run: anaconda --token ${{ env.ANACONDA_TOKEN }} upload --user dppy --label dev ${{ env.PACKAGE_NAME }}-*.tar.bz2 From 64478ae66190eb0f6b1df5221edd919dfb1eb868 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Fri, 9 Sep 2022 07:14:39 -0500 Subject: [PATCH 16/30] Use pin_compatible for run-time dependency generation on numpy, restrict numpy version bracket for host section --- conda-recipe/meta.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml index c24d86e5473b..dccf855c184c 100644 --- a/conda-recipe/meta.yaml +++ b/conda-recipe/meta.yaml @@ -8,7 +8,7 @@ requirements: host: - python - setuptools - - numpy >=1.19 + - numpy >=1.19,<1.22a0 - cython - cmake >=3.19 - dpctl >=0.13 @@ -23,7 +23,7 @@ requirements: - dpctl >=0.13 - {{ pin_compatible('dpcpp-cpp-rt', min_pin='x.x', max_pin='x') }} - {{ pin_compatible('mkl-dpcpp', min_pin='x.x', max_pin='x') }} - - numpy >=1.15 + - {{ pin_compatible('numpy', min_pin='x.x', max_pin='x') }} build: number: {{ GIT_DESCRIBE_NUMBER }} From bd1a414803a787f6368374fd72d9d074c447f269 Mon Sep 17 00:00:00 2001 From: Anton <100830759+antonwolfy@users.noreply.github.com> Date: Sat, 10 Sep 2022 15:51:29 +0200 Subject: [PATCH 17/30] Reorder channels in conda-build (#1182) * Reorder channels in conda-build * Remove conda-build script for Linux --- .github/workflows/conda-package.yml | 9 +++++---- scripts/build_conda_package.sh | 16 ---------------- 2 files changed, 5 insertions(+), 20 deletions(-) delete mode 100755 scripts/build_conda_package.sh diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml index 50b573a3e364..756c5f8709da 100644 --- a/.github/workflows/conda-package.yml +++ b/.github/workflows/conda-package.yml @@ -9,8 +9,7 @@ on: env: PACKAGE_NAME: dpnp MODULE_NAME: dpnp - VER_SCRIPT1: "import json; f = open('ver.json', 'r'); j = json.load(f); f.close(); " - VER_SCRIPT2: "d = j['dpnp'][0]; print('='.join((d[s] for s in ('version', 'build'))))" + CHANNELS: '-c dppy/label/dev -c intel -c defaults --override-channels' jobs: build_linux: @@ -66,7 +65,9 @@ jobs: run: conda install conda-build - name: Build conda package - run: ./scripts/build_conda_package.sh ${{ matrix.python }} $GITHUB_WORKSPACE/oneDPL + run: conda build --no-test --python ${{ matrix.python }} ${{ env.CHANNELS }} conda-recipe + env: + DPLROOT: '${{ github.workspace }}/oneDPL' - name: Upload artifact uses: actions/upload-artifact@v2 @@ -127,7 +128,7 @@ jobs: run: conda install conda-build - name: Build conda package - run: conda build --no-test --python ${{ matrix.python }} -c dppy/label/dev -c intel -c defaults --override-channels conda-recipe + run: conda build --no-test --python ${{ matrix.python }} ${{ env.CHANNELS }} conda-recipe env: DPLROOT: '%GITHUB_WORKSPACE%\oneDPL' diff --git a/scripts/build_conda_package.sh b/scripts/build_conda_package.sh deleted file mode 100755 index ae9474e1f773..000000000000 --- a/scripts/build_conda_package.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/bash - -PYTHON_VERSION=$1 -DPLROOT=$2 - -export DPLROOT - -CHANNELS="-c dppy/label/dev -c defaults -c intel --override-channels" -VERSIONS="--python $PYTHON_VERSION" -TEST="--no-test" - -conda build \ - $TEST \ - $VERSIONS \ - $CHANNELS \ - conda-recipe From 3be4b2e1d0ce7399761f2f4d0af6cb92ce2271fe Mon Sep 17 00:00:00 2001 From: Anton <100830759+antonwolfy@users.noreply.github.com> Date: Tue, 20 Sep 2022 20:56:43 +0200 Subject: [PATCH 18/30] Add tests running as a part of github actions (#1184) --- .github/workflows/conda-package.yml | 285 +++++++++++++++++++++++++++- tests/skipped_tests.tbl | 1 + 2 files changed, 281 insertions(+), 5 deletions(-) diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml index 756c5f8709da..83045bcd8f8d 100644 --- a/.github/workflows/conda-package.yml +++ b/.github/workflows/conda-package.yml @@ -10,6 +10,9 @@ env: PACKAGE_NAME: dpnp MODULE_NAME: dpnp CHANNELS: '-c dppy/label/dev -c intel -c defaults --override-channels' + VER_JSON_NAME: 'version.json' + VER_SCRIPT1: "import json; f = open('version.json', 'r'); j = json.load(f); f.close(); " + VER_SCRIPT2: "d = j['dpnp'][0]; print('='.join((d[s] for s in ('version', 'build'))))" jobs: build_linux: @@ -49,6 +52,9 @@ jobs: activate-environment: 'build' use-only-tar-bz2: true + - name: Install conda-build + run: conda install conda-build + - name: Cache conda packages uses: actions/cache@v3 env: @@ -61,9 +67,6 @@ jobs: ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-python-${{ matrix.python }}- ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}- - - name: Install conda-build - run: conda install conda-build - - name: Build conda package run: conda build --no-test --python ${{ matrix.python }} ${{ env.CHANNELS }} conda-recipe env: @@ -138,9 +141,281 @@ jobs: name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Python ${{ matrix.python }} path: ${{ env.conda-bld }}${{ env.PACKAGE_NAME }}-*.tar.bz2 - upload_linux: + test_linux: needs: build_linux + runs-on: ubuntu-latest + + defaults: + run: + shell: bash -l {0} + + strategy: + matrix: + python: ['3.8', '3.9'] + dpctl: ['0.13.0'] + experimental: [false] + + continue-on-error: ${{ matrix.experimental }} + + env: + conda-pkgs: '/home/runner/conda_pkgs_dir/' + channel-path: '${{ github.workspace }}/channel/' + pkg-path-in-channel: '${{ github.workspace }}/channel/linux-64/' + extracted-pkg-path: '${{ github.workspace }}/pkg/' + tests-path: '${{ github.workspace }}/pkg/info/test/' + ver-json-path: '${{ github.workspace }}/version.json' + + steps: + - name: Download artifact + uses: actions/download-artifact@v2 + with: + name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Python ${{ matrix.python }} + path: ${{ env.pkg-path-in-channel }} + + - name: Extract package archive + run: | + mkdir -p ${{ env.extracted-pkg-path }} + tar -xvf ${{ env.pkg-path-in-channel }}/${{ env.PACKAGE_NAME }}-*.tar.bz2 -C ${{ env.extracted-pkg-path }} + + - name: Setup miniconda + uses: conda-incubator/setup-miniconda@v2 + with: + auto-update-conda: true + python-version: ${{ matrix.python }} + miniconda-version: 'latest' + activate-environment: 'test' + + # Needed to be able to run conda index + - name: Install conda-build + run: conda install conda-build + + - name: Create conda channel + run: conda index ${{ env.channel-path }} + + - name: Test conda channel + run: | + conda search ${{ env.PACKAGE_NAME }} -c ${{ env.channel-path }} --override-channels --info --json > ${{ env.ver-json-path }} + cat ${{ env.ver-json-path }} + + - name: Collect dependencies + run: | + export PACKAGE_VERSION=$(python -c "${{ env.VER_SCRIPT1 }} ${{ env.VER_SCRIPT2 }}") + echo PACKAGE_VERSION=${PACKAGE_VERSION} + + conda install ${{ env.PACKAGE_NAME }}=${PACKAGE_VERSION} python=${{ matrix.python }} ${{ env.TEST_CHANNELS }} --only-deps --dry-run > lockfile + cat lockfile + env: + TEST_CHANNELS: '-c ${{ env.channel-path }} ${{ env.CHANNELS }}' + + - name: Cache conda packages + uses: actions/cache@v3 + env: + CACHE_NUMBER: 1 # Increase to reset cache + with: + path: ${{ env.conda-pkgs }} + key: + ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-python-${{ matrix.python }}-${{hashFiles('lockfile') }} + restore-keys: | + ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-python-${{ matrix.python }}- + ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}- + + - name: Install dpnp + run: | + export PACKAGE_VERSION=$(python -c "${{ env.VER_SCRIPT1 }} ${{ env.VER_SCRIPT2 }}") + echo PACKAGE_VERSION=${PACKAGE_VERSION} + + conda install ${{ env.PACKAGE_NAME }}=${PACKAGE_VERSION} dpctl=${{ matrix.dpctl }} pytest python=${{ matrix.python }} ${{ env.TEST_CHANNELS }} + env: + TEST_CHANNELS: '-c ${{ env.channel-path }} ${{ env.CHANNELS }}' + + - name: List installed packages + run: conda list + + - name: Smoke test + run: python -c "import dpnp, dpctl; dpctl.lsplatform()" + + # TODO: run the whole scope once the issues on CPU are resolved + - name: Run tests + run: python -m pytest -q -ra --disable-warnings -vv tests/test_arraycreation.py tests/test_dparray.py tests/test_mathematical.py + env: + SYCL_ENABLE_HOST_DEVICE: '1' + working-directory: ${{ env.tests-path }} + + test_windows: + needs: build_windows + + runs-on: windows-latest + + defaults: + run: + shell: cmd /C CALL {0} + + strategy: + matrix: + python: ['3.8', '3.9'] + dpctl: ['0.13.0'] + experimental: [false] + + continue-on-error: ${{ matrix.experimental }} + + env: + conda-pkgs: 'C:\Users\runneradmin\conda_pkgs_dir\' + channel-path: '${{ github.workspace }}\channel\' + pkg-path-in-channel: '${{ github.workspace }}\channel\win-64\' + extracted-pkg-path: '${{ github.workspace }}\pkg' + tests-path: '${{ github.workspace }}\pkg\info\test\' + ver-json-path: '${{ github.workspace }}\version.json' + active-env-name: 'test' + miniconda-lib-path: 'C:\Miniconda3\envs\test\Library\lib\' + miniconda-bin-path: 'C:\Miniconda3\envs\test\Library\bin\' + + steps: + - name: Download artifact + uses: actions/download-artifact@v2 + with: + name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Python ${{ matrix.python }} + path: ${{ env.pkg-path-in-channel }} + + - name: Extract package archive + run: | + @echo on + mkdir -p ${{ env.extracted-pkg-path }} + + set SEARCH_SCRIPT="DIR ${{ env.pkg-path-in-channel }} /s/b | FINDSTR /r "dpnp-.*\.tar\.bz2"" + FOR /F "tokens=* USEBACKQ" %%F IN (`%SEARCH_SCRIPT%`) DO ( + SET FULL_PACKAGE_PATH=%%F + ) + echo FULL_PACKAGE_PATH: %FULL_PACKAGE_PATH% + + python -c "import shutil; shutil.unpack_archive(r\"%FULL_PACKAGE_PATH%\", extract_dir=r\"${{ env.extracted-pkg-path }}\")" + dir ${{ env.extracted-pkg-path }} + + - name: Setup miniconda + uses: conda-incubator/setup-miniconda@v2 + with: + auto-update-conda: true + python-version: ${{ matrix.python }} + miniconda-version: 'latest' + activate-environment: ${{ env.active-env-name }} + + # Needed to be able to run conda index + - name: Install conda-build + run: conda install conda-build + + - name: Create conda channel + run: conda index ${{ env.channel-path }} + + - name: Test conda channel + run: | + @echo on + conda search ${{ env.PACKAGE_NAME }} -c ${{ env.channel-path }} --override-channels --info --json > ${{ env.ver-json-path }} + + - name: Dump version.json + run: more ${{ env.ver-json-path }} + + - name: Collect dependencies + run: | + @echo on + set "SCRIPT=${{ env.VER_SCRIPT1 }} ${{ env.VER_SCRIPT2 }}" + FOR /F "tokens=* USEBACKQ" %%F IN (`python -c "%SCRIPT%"`) DO ( + SET PACKAGE_VERSION=%%F + ) + echo PACKAGE_VERSION: %PACKAGE_VERSION% + + conda install ${{ env.PACKAGE_NAME }}=%PACKAGE_VERSION% dpctl=${{ matrix.dpctl }} python=${{ matrix.python }} ${{ env.TEST_CHANNELS }} --only-deps --dry-run > lockfile + env: + TEST_CHANNELS: '-c ${{ env.channel-path }} ${{ env.CHANNELS }}' + + - name: Dump lockfile + run: more lockfile + + - name: Cache conda packages + uses: actions/cache@v3 + env: + CACHE_NUMBER: 1 # Increase to reset cache + with: + path: ${{ env.conda-pkgs }} + key: + ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-python-${{ matrix.python }}-${{hashFiles('lockfile') }} + restore-keys: | + ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-python-${{ matrix.python }}- + ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}- + + - name: Install opencl_rt + run: conda install opencl_rt -c intel --override-channels + + - name: Install dpnp + run: | + @echo on + set "SCRIPT=${{ env.VER_SCRIPT1 }} ${{ env.VER_SCRIPT2 }}" + FOR /F "tokens=* USEBACKQ" %%F IN (`python -c "%SCRIPT%"`) DO ( + SET PACKAGE_VERSION=%%F + ) + echo PACKAGE_VERSION: %PACKAGE_VERSION% + + conda install ${{ env.PACKAGE_NAME }}=%PACKAGE_VERSION% dpctl=${{ matrix.dpctl }} pytest python=${{ matrix.python }} ${{ env.TEST_CHANNELS }} + env: + TEST_CHANNELS: '-c ${{ env.channel-path }} ${{ env.CHANNELS }}' + + - name: List installed packages + run: conda list + + - name: Add library + shell: pwsh + run: | + # Make sure the below libraries exist + Get-Item -Path ${{ env.miniconda-bin-path }}\OpenCL.dll + Get-Item -Path ${{ env.miniconda-lib-path }}\intelocl64.dll + + echo "OCL_ICD_FILENAMES=${{ env.miniconda-lib-path }}\intelocl64.dll" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append + try {$list = Get-Item -Path HKLM:\SOFTWARE\Khronos\OpenCL\Vendors | Select-Object -ExpandProperty Property } catch {$list=@()} + + if ($list.count -eq 0) { + if (-not (Test-Path -Path HKLM:\SOFTWARE\Khronos)) { + New-Item -Path HKLM:\SOFTWARE\Khronos + } + + if (-not (Test-Path -Path HKLM:\SOFTWARE\Khronos\OpenCL)) { + New-Item -Path HKLM:\SOFTWARE\Khronos\OpenCL + } + + if (-not (Test-Path -Path HKLM:\SOFTWARE\Khronos\OpenCL\Vendors)) { + New-Item -Path HKLM:\SOFTWARE\Khronos\OpenCL\Vendors + } + + New-ItemProperty -Path HKLM:\SOFTWARE\Khronos\OpenCL\Vendors -Name ${{ env.miniconda-lib-path }}\intelocl64.dll -Value 0 + try {$list = Get-Item -Path HKLM:\SOFTWARE\Khronos\OpenCL\Vendors | Select-Object -ExpandProperty Property } catch {$list=@()} + Write-Output $(Get-Item -Path HKLM:\SOFTWARE\Khronos\OpenCL\Vendors) + + # Now copy OpenCL.dll into system folder + $system_ocl_icd_loader="C:\Windows\System32\OpenCL.dll" + $python_ocl_icd_loader="${{ env.miniconda-bin-path }}\OpenCL.dll" + Copy-Item -Path $python_ocl_icd_loader -Destination $system_ocl_icd_loader + + if (Test-Path -Path $system_ocl_icd_loader) { + Write-Output "$system_ocl_icd_loader has been copied" + $acl = Get-Acl $system_ocl_icd_loader + Write-Output $acl + } else { + Write-Output "OCL-ICD-Loader was not copied" + } + + # Variable assisting OpenCL CPU driver to find TBB DLLs which are not located where it expects them by default + echo "TBB_DLL_PATH=${{ env.miniconda-bin-path }}" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append + } + + - name: Smoke test + run: python -c "import dpnp, dpctl; dpctl.lsplatform()" + + # TODO: run the whole scope once the issues on CPU are resolved + - name: Run tests + run: python -m pytest -q -ra --disable-warnings -vv tests\test_arraycreation.py tests\test_dparray.py tests\test_mathematical.py + working-directory: ${{ env.tests-path }} + + upload_linux: + needs: test_linux + if: ${{github.ref == 'refs/heads/master' || (startsWith(github.ref, 'refs/heads/release') == true) || github.event_name == 'push' && contains(github.ref, 'refs/tags/')}} runs-on: ubuntu-latest @@ -176,7 +451,7 @@ jobs: run: anaconda --token ${{ env.ANACONDA_TOKEN }} upload --user dppy --label dev ${{ env.PACKAGE_NAME }}-*.tar.bz2 upload_windows: - needs: build_windows + needs: test_windows if: ${{github.ref == 'refs/heads/master' || (startsWith(github.ref, 'refs/heads/release') == true) || github.event_name == 'push' && contains(github.ref, 'refs/tags/')}} diff --git a/tests/skipped_tests.tbl b/tests/skipped_tests.tbl index bbf3c1c3b535..b781e3772021 100644 --- a/tests/skipped_tests.tbl +++ b/tests/skipped_tests.tbl @@ -129,6 +129,7 @@ tests/test_linalg.py::test_svd[(2,2)-complex128] tests/test_linalg.py::test_svd[(3,4)-complex128] tests/test_linalg.py::test_svd[(5,3)-complex128] tests/test_linalg.py::test_svd[(16,16)-complex128] +tests/test_mathematical.py::TestGradient::test_gradient_y1_dx[3.5-array1] tests/test_random.py::TestPermutationsTestShuffle::test_shuffle1[lambda x: (dpnp.asarray([(i, i) for i in x], [("a", int), ("b", int)]).view(dpnp.recarray))] tests/test_random.py::TestPermutationsTestShuffle::test_shuffle1[lambda x: dpnp.asarray([(i, i) for i in x], [("a", object), ("b", dpnp.int32)])]] tests/test_random.py::TestPermutationsTestShuffle::test_shuffle1[lambda x: dpnp.asarray(x).astype(dpnp.int8)] From 614c829d8fdd1311fee35464c869f113b2c8dd29 Mon Sep 17 00:00:00 2001 From: Anton <100830759+antonwolfy@users.noreply.github.com> Date: Wed, 21 Sep 2022 11:23:02 +0200 Subject: [PATCH 19/30] [Build] setuptools 63.4.1 breaks build for Windows (#1185) * [SAT-5366] setuptools 63.4.1 breaks build for Windows * Add TODO note as suggested in review comment --- conda-recipe/bld.bat | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/conda-recipe/bld.bat b/conda-recipe/bld.bat index 6d4389b93dd0..8ec6c1fb1587 100644 --- a/conda-recipe/bld.bat +++ b/conda-recipe/bld.bat @@ -2,6 +2,13 @@ REM A workaround for activate-dpcpp.bat issue to be addressed in 2021.4 set "LIB=%BUILD_PREFIX%\Library\lib;%BUILD_PREFIX%\compiler\lib;%LIB%" SET "INCLUDE=%BUILD_PREFIX%\include;%INCLUDE%" +REM Since the 60.0.0 release, setuptools includes a local, vendored copy +REM of distutils (from late copies of CPython) that is enabled by default. +REM It breaks build for Windows, so use distutils from "stdlib" as before. +REM @TODO: remove the setting, once transition to build backend on Windows +REM to cmake is complete. +SET "SETUPTOOLS_USE_DISTUTILS=stdlib" + IF DEFINED DPLROOT ( ECHO "Sourcing DPLROOT" SET "INCLUDE=%DPLROOT%\include;%INCLUDE%" From e3e04c75d485be777663d388a169b39fd0ed402b Mon Sep 17 00:00:00 2001 From: Diptorup Deb Date: Wed, 21 Sep 2022 16:32:40 -0500 Subject: [PATCH 20/30] Add extra information on how to build dpdnp from source. --- README.md | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/README.md b/README.md index bf9dfd5ff27e..a26125fa35fe 100644 --- a/README.md +++ b/README.md @@ -20,6 +20,35 @@ DPNP_QUEUE_GPU=1 python examples/example1.py ``` ## Build from source: + Ensure you have the following prerequisite packages installed: + +- `mkl-devel-dpcpp` +- `dpcpp_linux-64` or `dpcpp_win-64` (depending on your OS) +- `tbb-devel` +- `dpctl` + +In addition, you need oneDPL installed on your system. There are two ways to do +so: + +1. Install oneAPI and run the oneDPL activation script. E.g., on linux: + + ```bash + source /opt/intel/oneapi/dpl/latest/env/vars.sh + ``` + +2. Clone dpl from https://github.com/oneapi-src/oneDPL and set the `DPL_ROOT` + environment variable to point to the `include` directory in the repository. + + E.g., on linux + + ```bash + git clone https://github.com/oneapi-src/oneDPL + export DPL_ROOT=$(pwd)/oneDPL/include + ``` + +After these steps, `dpnp` can be built in debug mode as follows: + + ```bash git clone https://github.com/IntelPython/dpnp cd dpnp From efa1a195850d65bf8acd22a5b3daee432e090e5f Mon Sep 17 00:00:00 2001 From: Anton <100830759+antonwolfy@users.noreply.github.com> Date: Thu, 29 Sep 2022 09:13:10 +0200 Subject: [PATCH 21/30] Fix import dpnp on Win & python 3.10 (#1189) * Fix import dpnp on Win & python 3.10 * PATH is used by python till 3.10 * Update dpnp/__init__.py Co-authored-by: Oleksandr Pavlyk * Update dpnp/__init__.py Co-authored-by: Oleksandr Pavlyk * Update dpnp/__init__.py Co-authored-by: Oleksandr Pavlyk * Update dpnp/__init__.py Co-authored-by: Oleksandr Pavlyk * Apply review comments Co-authored-by: Oleksandr Pavlyk --- dpnp/__init__.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/dpnp/__init__.py b/dpnp/__init__.py index 843c3c111a6b..5fbe1f05864d 100644 --- a/dpnp/__init__.py +++ b/dpnp/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # ***************************************************************************** -# Copyright (c) 2016-2020, Intel Corporation +# Copyright (c) 2016-2022, Intel Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -30,7 +30,16 @@ import dpctl dpctlpath = os.path.dirname(dpctl.__file__) -os.environ["PATH"] += os.pathsep + mypath + os.pathsep + dpctlpath +# For Windows OS with Python >= 3.7, it is required to explicitly define a path +# where to search for DLLs towards both DPNP backend and DPCTL Sycl interface, +# otherwise DPNP import will be failing. This is because the libraries +# are not installed under any of default paths where Python is searching. +from platform import system +if system() == 'Windows': + if hasattr(os, "add_dll_directory"): + os.add_dll_directory(mypath) + os.add_dll_directory(dpctlpath) + os.environ["PATH"] = os.pathsep.join([os.getenv("PATH", ""), mypath, dpctlpath]) from dpnp.dpnp_array import dpnp_array as ndarray From da21159445e904af4570e3364ec5735a9517d42e Mon Sep 17 00:00:00 2001 From: Anton <100830759+antonwolfy@users.noreply.github.com> Date: Thu, 6 Oct 2022 17:39:03 +0200 Subject: [PATCH 22/30] Missing path towards TBB DLLs on Windows (#1195) --- .github/workflows/conda-package.yml | 56 ++++++++++++++++------------- 1 file changed, 31 insertions(+), 25 deletions(-) diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml index 83045bcd8f8d..6aa9cb16151f 100644 --- a/.github/workflows/conda-package.yml +++ b/.github/workflows/conda-package.yml @@ -28,7 +28,6 @@ jobs: env: conda-pkgs: '/home/runner/conda_pkgs_dir/' - conda-bld: '/usr/share/miniconda3/envs/build/conda-bld/linux-64/' steps: - name: Checkout DPNP repo @@ -52,6 +51,9 @@ jobs: activate-environment: 'build' use-only-tar-bz2: true + - name: Store conda paths as envs + run: echo "CONDA_BLD=$CONDA_PREFIX/conda-bld/linux-64/" >> $GITHUB_ENV + - name: Install conda-build run: conda install conda-build @@ -76,7 +78,7 @@ jobs: uses: actions/upload-artifact@v2 with: name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Python ${{ matrix.python }} - path: ${{ env.conda-bld }}${{ env.PACKAGE_NAME }}-*.tar.bz2 + path: ${{ env.CONDA_BLD }}${{ env.PACKAGE_NAME }}-*.tar.bz2 build_windows: runs-on: windows-latest @@ -91,7 +93,6 @@ jobs: env: conda-pkgs: 'C:\Users\runneradmin\conda_pkgs_dir\' - conda-bld: 'C:\Miniconda3\envs\build\conda-bld\win-64\' steps: - name: Checkout DPNP repo @@ -115,6 +116,11 @@ jobs: activate-environment: 'build' use-only-tar-bz2: true + - name: Store conda paths as envs + run: | + @echo on + (echo CONDA_BLD=%CONDA_PREFIX%\conda-bld\win-64\) >> %GITHUB_ENV% + - name: Cache conda packages uses: actions/cache@v3 env: @@ -139,7 +145,7 @@ jobs: uses: actions/upload-artifact@v2 with: name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Python ${{ matrix.python }} - path: ${{ env.conda-bld }}${{ env.PACKAGE_NAME }}-*.tar.bz2 + path: ${{ env.CONDA_BLD }}${{ env.PACKAGE_NAME }}-*.tar.bz2 test_linux: needs: build_linux @@ -201,7 +207,9 @@ jobs: - name: Collect dependencies run: | export PACKAGE_VERSION=$(python -c "${{ env.VER_SCRIPT1 }} ${{ env.VER_SCRIPT2 }}") + echo PACKAGE_VERSION=${PACKAGE_VERSION} + echo "PACKAGE_VERSION=$PACKAGE_VERSION" >> $GITHUB_ENV conda install ${{ env.PACKAGE_NAME }}=${PACKAGE_VERSION} python=${{ matrix.python }} ${{ env.TEST_CHANNELS }} --only-deps --dry-run > lockfile cat lockfile @@ -221,11 +229,7 @@ jobs: ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}- - name: Install dpnp - run: | - export PACKAGE_VERSION=$(python -c "${{ env.VER_SCRIPT1 }} ${{ env.VER_SCRIPT2 }}") - echo PACKAGE_VERSION=${PACKAGE_VERSION} - - conda install ${{ env.PACKAGE_NAME }}=${PACKAGE_VERSION} dpctl=${{ matrix.dpctl }} pytest python=${{ matrix.python }} ${{ env.TEST_CHANNELS }} + run: conda install ${{ env.PACKAGE_NAME }}=${{ env.PACKAGE_VERSION }} dpctl=${{ matrix.dpctl }} pytest python=${{ matrix.python }} ${{ env.TEST_CHANNELS }} env: TEST_CHANNELS: '-c ${{ env.channel-path }} ${{ env.CHANNELS }}' @@ -267,8 +271,6 @@ jobs: tests-path: '${{ github.workspace }}\pkg\info\test\' ver-json-path: '${{ github.workspace }}\version.json' active-env-name: 'test' - miniconda-lib-path: 'C:\Miniconda3\envs\test\Library\lib\' - miniconda-bin-path: 'C:\Miniconda3\envs\test\Library\bin\' steps: - name: Download artifact @@ -299,6 +301,12 @@ jobs: miniconda-version: 'latest' activate-environment: ${{ env.active-env-name }} + - name: Store conda paths as envs + run: | + @echo on + (echo CONDA_LIB_PATH=%CONDA_PREFIX%\Library\lib\) >> %GITHUB_ENV% + (echo CONDA_LIB_BIN_PATH=%CONDA_PREFIX%\Library\bin\) >> %GITHUB_ENV% + # Needed to be able to run conda index - name: Install conda-build run: conda install conda-build @@ -322,6 +330,7 @@ jobs: SET PACKAGE_VERSION=%%F ) echo PACKAGE_VERSION: %PACKAGE_VERSION% + (echo PACKAGE_VERSION=%PACKAGE_VERSION%) >> %GITHUB_ENV% conda install ${{ env.PACKAGE_NAME }}=%PACKAGE_VERSION% dpctl=${{ matrix.dpctl }} python=${{ matrix.python }} ${{ env.TEST_CHANNELS }} --only-deps --dry-run > lockfile env: @@ -348,13 +357,7 @@ jobs: - name: Install dpnp run: | @echo on - set "SCRIPT=${{ env.VER_SCRIPT1 }} ${{ env.VER_SCRIPT2 }}" - FOR /F "tokens=* USEBACKQ" %%F IN (`python -c "%SCRIPT%"`) DO ( - SET PACKAGE_VERSION=%%F - ) - echo PACKAGE_VERSION: %PACKAGE_VERSION% - - conda install ${{ env.PACKAGE_NAME }}=%PACKAGE_VERSION% dpctl=${{ matrix.dpctl }} pytest python=${{ matrix.python }} ${{ env.TEST_CHANNELS }} + conda install ${{ env.PACKAGE_NAME }}=${{ env.PACKAGE_VERSION }} dpctl=${{ matrix.dpctl }} pytest python=${{ matrix.python }} ${{ env.TEST_CHANNELS }} env: TEST_CHANNELS: '-c ${{ env.channel-path }} ${{ env.CHANNELS }}' @@ -365,10 +368,10 @@ jobs: shell: pwsh run: | # Make sure the below libraries exist - Get-Item -Path ${{ env.miniconda-bin-path }}\OpenCL.dll - Get-Item -Path ${{ env.miniconda-lib-path }}\intelocl64.dll + Get-Item -Path "$env:CONDA_LIB_BIN_PATH\OpenCL.dll" + Get-Item -Path "$env:CONDA_LIB_PATH\intelocl64.dll" - echo "OCL_ICD_FILENAMES=${{ env.miniconda-lib-path }}\intelocl64.dll" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append + echo "OCL_ICD_FILENAMES = $env:CONDA_LIB_PATH\intelocl64.dll" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append try {$list = Get-Item -Path HKLM:\SOFTWARE\Khronos\OpenCL\Vendors | Select-Object -ExpandProperty Property } catch {$list=@()} if ($list.count -eq 0) { @@ -384,13 +387,13 @@ jobs: New-Item -Path HKLM:\SOFTWARE\Khronos\OpenCL\Vendors } - New-ItemProperty -Path HKLM:\SOFTWARE\Khronos\OpenCL\Vendors -Name ${{ env.miniconda-lib-path }}\intelocl64.dll -Value 0 + New-ItemProperty -Path HKLM:\SOFTWARE\Khronos\OpenCL\Vendors -Name "$env:CONDA_LIB_PATH\intelocl64.dll" -Value 0 try {$list = Get-Item -Path HKLM:\SOFTWARE\Khronos\OpenCL\Vendors | Select-Object -ExpandProperty Property } catch {$list=@()} Write-Output $(Get-Item -Path HKLM:\SOFTWARE\Khronos\OpenCL\Vendors) # Now copy OpenCL.dll into system folder $system_ocl_icd_loader="C:\Windows\System32\OpenCL.dll" - $python_ocl_icd_loader="${{ env.miniconda-bin-path }}\OpenCL.dll" + $python_ocl_icd_loader="$env:CONDA_LIB_BIN_PATH\OpenCL.dll" Copy-Item -Path $python_ocl_icd_loader -Destination $system_ocl_icd_loader if (Test-Path -Path $system_ocl_icd_loader) { @@ -401,8 +404,11 @@ jobs: Write-Output "OCL-ICD-Loader was not copied" } - # Variable assisting OpenCL CPU driver to find TBB DLLs which are not located where it expects them by default - echo "TBB_DLL_PATH=${{ env.miniconda-bin-path }}" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append + # Configuration variable assisting OpenCL CPU driver to find TBB DLLs which are not located where it expects them by default + $cl_cfg="$env:CONDA_LIB_PATH\cl.cfg" + Write-Output "`n>>> Dump content of $cl_cfg`n" (Get-Content $cl_cfg) "`n<<< end of dump`n" + (Get-Content $cl_cfg) -replace '^CL_CONFIG_TBB_DLL_PATH =.*', "CL_CONFIG_TBB_DLL_PATH = $env:CONDA_LIB_BIN_PATH" | Set-Content $cl_cfg + Write-Output "`n>>> Dump content of modified $cl_cfg`n" (Get-Content $cl_cfg) "`n<<< end of dump`n" } - name: Smoke test From 63ff52418d6b603876add6405059d51f224cf19c Mon Sep 17 00:00:00 2001 From: Anton <100830759+antonwolfy@users.noreply.github.com> Date: Fri, 7 Oct 2022 14:42:11 +0200 Subject: [PATCH 23/30] Fix missing support of compute follows data in dpnp.erf (#1194) * Fix support of compute follows data in dpnp.erf * Include tests for dpnp.erf in validation with github action --- .github/workflows/conda-package.yml | 8 ++++---- dpnp/dpnp_iface_libmath.py | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml index 6aa9cb16151f..798359357183 100644 --- a/.github/workflows/conda-package.yml +++ b/.github/workflows/conda-package.yml @@ -169,7 +169,7 @@ jobs: channel-path: '${{ github.workspace }}/channel/' pkg-path-in-channel: '${{ github.workspace }}/channel/linux-64/' extracted-pkg-path: '${{ github.workspace }}/pkg/' - tests-path: '${{ github.workspace }}/pkg/info/test/' + tests-path: '${{ github.workspace }}/pkg/info/test/tests/' ver-json-path: '${{ github.workspace }}/version.json' steps: @@ -241,7 +241,7 @@ jobs: # TODO: run the whole scope once the issues on CPU are resolved - name: Run tests - run: python -m pytest -q -ra --disable-warnings -vv tests/test_arraycreation.py tests/test_dparray.py tests/test_mathematical.py + run: python -m pytest -q -ra --disable-warnings -vv test_arraycreation.py test_dparray.py test_mathematical.py test_special.py env: SYCL_ENABLE_HOST_DEVICE: '1' working-directory: ${{ env.tests-path }} @@ -268,7 +268,7 @@ jobs: channel-path: '${{ github.workspace }}\channel\' pkg-path-in-channel: '${{ github.workspace }}\channel\win-64\' extracted-pkg-path: '${{ github.workspace }}\pkg' - tests-path: '${{ github.workspace }}\pkg\info\test\' + tests-path: '${{ github.workspace }}\pkg\info\test\tests\' ver-json-path: '${{ github.workspace }}\version.json' active-env-name: 'test' @@ -416,7 +416,7 @@ jobs: # TODO: run the whole scope once the issues on CPU are resolved - name: Run tests - run: python -m pytest -q -ra --disable-warnings -vv tests\test_arraycreation.py tests\test_dparray.py tests\test_mathematical.py + run: python -m pytest -q -ra --disable-warnings -vv test_arraycreation.py test_dparray.py test_mathematical.py test_special.py working-directory: ${{ env.tests-path }} upload_linux: diff --git a/dpnp/dpnp_iface_libmath.py b/dpnp/dpnp_iface_libmath.py index 934f420bd68b..6ef787487afa 100644 --- a/dpnp/dpnp_iface_libmath.py +++ b/dpnp/dpnp_iface_libmath.py @@ -2,7 +2,7 @@ # distutils: language = c++ # -*- coding: utf-8 -*- # ***************************************************************************** -# Copyright (c) 2016-2020, Intel Corporation +# Copyright (c) 2016-2022, Intel Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -77,7 +77,7 @@ def erf(in_array1): """ - x1_desc = dpnp.get_dpnp_descriptor(in_array1, copy_when_strides=False) + x1_desc = dpnp.get_dpnp_descriptor(in_array1, copy_when_strides=False, copy_when_nondefault_queue=False) if x1_desc: return dpnp_erf(x1_desc).get_pyobj() From 9830f99641780963aae6c30045c236648733d00a Mon Sep 17 00:00:00 2001 From: Anton <100830759+antonwolfy@users.noreply.github.com> Date: Fri, 7 Oct 2022 16:57:20 +0200 Subject: [PATCH 24/30] Fix github action for building docs (#1191) * Fix github action for building docs * Get rid of pinning for cmake --- .github/workflows/build-sphinx.yml | 66 +++++++++++++++++++---------- .github/workflows/conda-package.yml | 22 ++++++++-- .gitignore | 6 +++ doc/conf.py | 4 +- doc/reference/fft.rst | 10 +++-- doc/reference/ndarray.rst | 2 +- doc/reference/polynomials.rst | 12 +++--- 7 files changed, 83 insertions(+), 39 deletions(-) diff --git a/.github/workflows/build-sphinx.yml b/.github/workflows/build-sphinx.yml index 0c0f220d59a6..16de6e2d480e 100644 --- a/.github/workflows/build-sphinx.yml +++ b/.github/workflows/build-sphinx.yml @@ -3,16 +3,27 @@ on: push: branches: - master + pull_request: jobs: build-and-deploy: name: Build and Deploy Docs + runs-on: ubuntu-20.04 + + defaults: + run: + shell: bash -l {0} + + env: + python-ver: '3.9' + steps: - name: Cancel Previous Runs - uses: styfle/cancel-workflow-action@0.6.0 + uses: styfle/cancel-workflow-action@0.10.0 with: access_token: ${{ github.token }} + - name: Install Intel repository run: | wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2023.PUB @@ -20,17 +31,20 @@ jobs: rm GPG-PUB-KEY-INTEL-SW-PRODUCTS-2023.PUB sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main" sudo apt-get update + - name: Update libstdc++-dev run: | sudo apt remove -y gcc-7 g++-7 gcc-8 g++-8 gcc-10 g++-10 sudo apt remove -y libstdc++-10-dev sudo apt autoremove sudo apt install --reinstall -y gcc-9 g++-9 libstdc++-9-dev + - name: Install Intel OneAPI run: | sudo apt-get install intel-oneapi-mkl \ intel-oneapi-mkl-devel \ intel-oneapi-dpcpp-cpp-compiler + # https://github.com/marketplace/actions/checkout - name: Install nvidia-cuda support drivers run: | @@ -38,54 +52,60 @@ jobs: sudo apt-get update sudo apt-get install -y libnvidia-gl-450 sudo apt-get install -y nvidia-cuda-toolkit clinfo + - name: Checkout repo - uses: actions/checkout@v2 + uses: actions/checkout@v3 + # https://github.com/marketplace/actions/setup-miniconda - name: Setup miniconda uses: conda-incubator/setup-miniconda@v2 with: - activate-environment: dpnp - python-version: 3.8 - channels: intel,conda-forge - auto-activate-base: false - - name: Conda info - shell: bash -l {0} - run: | - conda info - conda list + auto-update-conda: true + python-version: ${{ env.python-ver }} + miniconda-version: 'latest' + activate-environment: 'docs' + channels: intel, conda-forge + - name: Install sphinx dependencies - shell: bash -l {0} - run: | - conda install sphinx sphinx_rtd_theme + run: conda install sphinx sphinx_rtd_theme + - name: Install dpnp dependencies - shell: bash -l {0} run: | - conda install dpctl mkl-devel-dpcpp tbb-devel dpcpp_linux-64 cmake=3.19 cython pytest \ + conda install dpctl mkl-devel-dpcpp tbb-devel dpcpp_linux-64 cmake cython pytest \ -c dppy/label/dev -c intel -c conda-forge + - name: Install cuPy dependencies - shell: bash -l {0} + run: conda install -c conda-forge cupy cudatoolkit=10.0 + + - name: Conda info run: | - conda install -c conda-forge cupy cudatoolkit=10.0 + conda info + conda list + - name: Build library - shell: bash -l {0} run: | DPLROOT=/opt/intel/oneapi/dpl/latest python setup.py build_clib CC=dpcpp python setup.py build_ext --inplace python setup.py develop + - name: Build docs - shell: bash -l {0} - run: | - make html + run: make html working-directory: doc + # https://github.com/marketplace/actions/doxygen-action - name: Build backend docs - uses: mattnotmitt/doxygen-action@v1 + uses: mattnotmitt/doxygen-action@v1.9.4 with: working-directory: 'dpnp/backend/doc' + - name: Copy backend docs run: cp -r dpnp/backend/doc/html doc/_build/html/backend_doc + # https://github.com/marketplace/actions/github-pages-action - name: Deploy docs + if: | + !github.event.pull_request.head.repo.fork && + (github.ref == 'refs/heads/master' || (startsWith(github.ref, 'refs/heads/release') == true) || github.event_name == 'push' && contains(github.ref, 'refs/tags/')) uses: peaceiris/actions-gh-pages@v3 with: github_token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml index 798359357183..fb307a54031b 100644 --- a/.github/workflows/conda-package.yml +++ b/.github/workflows/conda-package.yml @@ -30,6 +30,11 @@ jobs: conda-pkgs: '/home/runner/conda_pkgs_dir/' steps: + - name: Cancel Previous Runs + uses: styfle/cancel-workflow-action@0.10.0 + with: + access_token: ${{ github.token }} + - name: Checkout DPNP repo uses: actions/checkout@v3 with: @@ -95,6 +100,11 @@ jobs: conda-pkgs: 'C:\Users\runneradmin\conda_pkgs_dir\' steps: + - name: Cancel Previous Runs + uses: styfle/cancel-workflow-action@0.10.0 + with: + access_token: ${{ github.token }} + - name: Checkout DPNP repo uses: actions/checkout@v3 with: @@ -422,7 +432,9 @@ jobs: upload_linux: needs: test_linux - if: ${{github.ref == 'refs/heads/master' || (startsWith(github.ref, 'refs/heads/release') == true) || github.event_name == 'push' && contains(github.ref, 'refs/tags/')}} + if: | + !github.event.pull_request.head.repo.fork && + (github.ref == 'refs/heads/master' || (startsWith(github.ref, 'refs/heads/release') == true) || github.event_name == 'push' && contains(github.ref, 'refs/tags/')) runs-on: ubuntu-latest @@ -452,14 +464,16 @@ jobs: run: conda install anaconda-client - name: Upload + run: anaconda --token ${{ env.ANACONDA_TOKEN }} upload --user dppy --label dev ${{ env.PACKAGE_NAME }}-*.tar.bz2 env: ANACONDA_TOKEN: ${{ secrets.ANACONDA_TOKEN }} - run: anaconda --token ${{ env.ANACONDA_TOKEN }} upload --user dppy --label dev ${{ env.PACKAGE_NAME }}-*.tar.bz2 upload_windows: needs: test_windows - if: ${{github.ref == 'refs/heads/master' || (startsWith(github.ref, 'refs/heads/release') == true) || github.event_name == 'push' && contains(github.ref, 'refs/tags/')}} + if: | + !github.event.pull_request.head.repo.fork && + (github.ref == 'refs/heads/master' || (startsWith(github.ref, 'refs/heads/release') == true) || github.event_name == 'push' && contains(github.ref, 'refs/tags/')) runs-on: windows-latest @@ -488,6 +502,6 @@ jobs: run: conda install anaconda-client - name: Upload + run: anaconda --token ${{ env.ANACONDA_TOKEN }} upload --user dppy --label dev ${{ env.PACKAGE_NAME }}-*.tar.bz2 env: ANACONDA_TOKEN: ${{ secrets.ANACONDA_TOKEN }} - run: anaconda --token ${{ env.ANACONDA_TOKEN }} upload --user dppy --label dev ${{ env.PACKAGE_NAME }}-*.tar.bz2 diff --git a/.gitignore b/.gitignore index ea56758f290b..fda4c1635310 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,12 @@ __pycache__/ # Code project files .vscode +# Files from test of code coverage +coverage.xml + +# Backup files kept after git merge/rebase +*.orig + *dpnp_backend* dpnp/**/*.cpython*.so dpnp/**/*.pyd diff --git a/doc/conf.py b/doc/conf.py index cd7c9a1002e2..34ccf03a25e2 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -27,7 +27,7 @@ # -- Project information ----------------------------------------------------- project = 'dpnp' -copyright = '2020, Intel' +copyright = '2020-2022, Intel' author = 'Intel' # The short X.Y version @@ -73,7 +73,7 @@ # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. -language = None +language = 'en' # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. diff --git a/doc/reference/fft.rst b/doc/reference/fft.rst index 0d3b0999b71e..2af0ee47793d 100644 --- a/doc/reference/fft.rst +++ b/doc/reference/fft.rst @@ -57,7 +57,9 @@ Helper routines dpnp.fft.rfftfreq dpnp.fft.fftshift dpnp.fft.ifftshift - dpnp.fft.config.set_cufft_callbacks - dpnp.fft.config.set_cufft_gpus - dpnp.fft.config.get_plan_cache - dpnp.fft.config.show_plan_cache_info + + .. fft.config module is not implemented yet + .. dpnp.fft.config.set_cufft_callbacks + .. dpnp.fft.config.set_cufft_gpus + .. dpnp.fft.config.get_plan_cache + .. dpnp.fft.config.show_plan_cache_info diff --git a/doc/reference/ndarray.rst b/doc/reference/ndarray.rst index 670c312224d5..5fca216ec94c 100644 --- a/doc/reference/ndarray.rst +++ b/doc/reference/ndarray.rst @@ -11,4 +11,4 @@ For the basic concept of ``ndarray``\s, please refer to the `NumPy documentation :nosignatures: dpnp.ndarray - dpnp.dparray.dparray + dpnp.dpnp_array.dpnp_array diff --git a/doc/reference/polynomials.rst b/doc/reference/polynomials.rst index ad20cc6ad4bc..d86cedfe49a1 100644 --- a/doc/reference/polynomials.rst +++ b/doc/reference/polynomials.rst @@ -13,8 +13,9 @@ Polynomial Module :toctree: generated/ :nosignatures: - dpnp.polynomial.polynomial.polyvander - dpnp.polynomial.polynomial.polycompanion + .. polynomial module is not implemented yet + .. dpnp.polynomial.polynomial.polyvander + .. dpnp.polynomial.polynomial.polycompanion Polyutils @@ -24,9 +25,10 @@ Polyutils :toctree: generated/ :nosignatures: - dpnp.polynomial.polyutils.as_series - dpnp.polynomial.polyutils.trimseq - dpnp.polynomial.polyutils.trimcoef + .. polyutils module is not implemented yet + .. dpnp.polynomial.polyutils.as_series + .. dpnp.polynomial.polyutils.trimseq + .. dpnp.polynomial.polyutils.trimcoef Poly1d From 95d47fda0300ef87eacf64070fbf81631390815b Mon Sep 17 00:00:00 2001 From: Anton <100830759+antonwolfy@users.noreply.github.com> Date: Mon, 10 Oct 2022 16:56:57 +0200 Subject: [PATCH 25/30] Setting version to 0.10.2 (#1196) --- doc/conf.py | 2 +- dpnp/backend/CMakeLists.txt | 4 ++-- dpnp/backend/doc/Doxyfile | 2 +- dpnp/version.py | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/doc/conf.py b/doc/conf.py index 34ccf03a25e2..3ef5305c8578 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -33,7 +33,7 @@ # The short X.Y version version = '0.10' # The full version, including alpha/beta/rc tags -release = '0.10.1' +release = '0.10.2' # -- General configuration --------------------------------------------------- diff --git a/dpnp/backend/CMakeLists.txt b/dpnp/backend/CMakeLists.txt index 109689d408a8..ba982fcd2b99 100644 --- a/dpnp/backend/CMakeLists.txt +++ b/dpnp/backend/CMakeLists.txt @@ -1,5 +1,5 @@ # ***************************************************************************** -# Copyright (c) 2016-2020, Intel Corporation +# Copyright (c) 2016-2022, Intel Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -27,7 +27,7 @@ cmake_minimum_required(VERSION 3.10 FATAL_ERROR) -# set(DPNP_VERSION 0.10.0) +# set(DPNP_VERSION 0.10.2) # set(DPNP_API_VERSION 0.10) # set directory where the custom finders live diff --git a/dpnp/backend/doc/Doxyfile b/dpnp/backend/doc/Doxyfile index 6c7439a25c3a..e4f1531ba2b4 100644 --- a/dpnp/backend/doc/Doxyfile +++ b/dpnp/backend/doc/Doxyfile @@ -38,7 +38,7 @@ PROJECT_NAME = "DPNP C++ backend kernel library" # could be handy for archiving the generated documentation or if some version # control system is used. -PROJECT_NUMBER = 0.10.1 +PROJECT_NUMBER = 0.10.2 # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer a diff --git a/dpnp/version.py b/dpnp/version.py index 9e0bcf9c1836..65515107f2a5 100644 --- a/dpnp/version.py +++ b/dpnp/version.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- # ***************************************************************************** -# Copyright (c) 2016-2020, Intel Corporation +# Copyright (c) 2016-2022, Intel Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -29,6 +29,6 @@ DPNP version module """ -__version__: str = '0.10.1' +__version__: str = '0.10.2' version: str = __version__ From cfb62a6459720b5c934d1c1b87fc8f724a0c4e46 Mon Sep 17 00:00:00 2001 From: Anton <100830759+antonwolfy@users.noreply.github.com> Date: Mon, 17 Oct 2022 15:46:06 +0200 Subject: [PATCH 26/30] Test runs to update to node 16 (#1198) --- .github/workflows/build-sphinx.yml | 8 +++--- .github/workflows/conda-package.yml | 44 ++++++++++++++--------------- .github/workflows/pre-commit.yml | 8 ++++-- 3 files changed, 31 insertions(+), 29 deletions(-) diff --git a/.github/workflows/build-sphinx.yml b/.github/workflows/build-sphinx.yml index 16de6e2d480e..52623a587625 100644 --- a/.github/workflows/build-sphinx.yml +++ b/.github/workflows/build-sphinx.yml @@ -20,7 +20,7 @@ jobs: steps: - name: Cancel Previous Runs - uses: styfle/cancel-workflow-action@0.10.0 + uses: styfle/cancel-workflow-action@0.11.0 with: access_token: ${{ github.token }} @@ -54,11 +54,11 @@ jobs: sudo apt-get install -y nvidia-cuda-toolkit clinfo - name: Checkout repo - uses: actions/checkout@v3 + uses: actions/checkout@v3.1.0 # https://github.com/marketplace/actions/setup-miniconda - name: Setup miniconda - uses: conda-incubator/setup-miniconda@v2 + uses: conda-incubator/setup-miniconda@v2.1.1 with: auto-update-conda: true python-version: ${{ env.python-ver }} @@ -106,7 +106,7 @@ jobs: if: | !github.event.pull_request.head.repo.fork && (github.ref == 'refs/heads/master' || (startsWith(github.ref, 'refs/heads/release') == true) || github.event_name == 'push' && contains(github.ref, 'refs/tags/')) - uses: peaceiris/actions-gh-pages@v3 + uses: peaceiris/actions-gh-pages@v3.8.0 with: github_token: ${{ secrets.GITHUB_TOKEN }} publish_dir: doc/_build/html/ diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml index fb307a54031b..39867a86bb13 100644 --- a/.github/workflows/conda-package.yml +++ b/.github/workflows/conda-package.yml @@ -31,24 +31,24 @@ jobs: steps: - name: Cancel Previous Runs - uses: styfle/cancel-workflow-action@0.10.0 + uses: styfle/cancel-workflow-action@0.11.0 with: access_token: ${{ github.token }} - name: Checkout DPNP repo - uses: actions/checkout@v3 + uses: actions/checkout@v3.1.0 with: fetch-depth: 0 - name: Checkout oneDPL - uses: actions/checkout@v3 + uses: actions/checkout@v3.1.0 with: repository: oneapi-src/oneDPL path: oneDPL ref: oneDPL-2021.7.0-release - name: Setup miniconda - uses: conda-incubator/setup-miniconda@v2 + uses: conda-incubator/setup-miniconda@v2.1.1 with: auto-update-conda: true python-version: ${{ matrix.python }} @@ -63,7 +63,7 @@ jobs: run: conda install conda-build - name: Cache conda packages - uses: actions/cache@v3 + uses: actions/cache@v3.0.11 env: CACHE_NUMBER: 1 # Increase to reset cache with: @@ -80,7 +80,7 @@ jobs: DPLROOT: '${{ github.workspace }}/oneDPL' - name: Upload artifact - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3.1.0 with: name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Python ${{ matrix.python }} path: ${{ env.CONDA_BLD }}${{ env.PACKAGE_NAME }}-*.tar.bz2 @@ -101,24 +101,24 @@ jobs: steps: - name: Cancel Previous Runs - uses: styfle/cancel-workflow-action@0.10.0 + uses: styfle/cancel-workflow-action@0.11.0 with: access_token: ${{ github.token }} - name: Checkout DPNP repo - uses: actions/checkout@v3 + uses: actions/checkout@v3.1.0 with: fetch-depth: 0 - name: Checkout oneDPL - uses: actions/checkout@v3 + uses: actions/checkout@v3.1.0 with: repository: oneapi-src/oneDPL path: oneDPL ref: oneDPL-2021.7.0-release - name: Setup miniconda - uses: conda-incubator/setup-miniconda@v2 + uses: conda-incubator/setup-miniconda@v2.1.1 with: auto-update-conda: true python-version: ${{ matrix.python }} @@ -132,7 +132,7 @@ jobs: (echo CONDA_BLD=%CONDA_PREFIX%\conda-bld\win-64\) >> %GITHUB_ENV% - name: Cache conda packages - uses: actions/cache@v3 + uses: actions/cache@v3.0.11 env: CACHE_NUMBER: 1 # Increase to reset cache with: @@ -152,7 +152,7 @@ jobs: DPLROOT: '%GITHUB_WORKSPACE%\oneDPL' - name: Upload artifact - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3.1.0 with: name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Python ${{ matrix.python }} path: ${{ env.CONDA_BLD }}${{ env.PACKAGE_NAME }}-*.tar.bz2 @@ -184,7 +184,7 @@ jobs: steps: - name: Download artifact - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3.0.0 with: name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Python ${{ matrix.python }} path: ${{ env.pkg-path-in-channel }} @@ -195,7 +195,7 @@ jobs: tar -xvf ${{ env.pkg-path-in-channel }}/${{ env.PACKAGE_NAME }}-*.tar.bz2 -C ${{ env.extracted-pkg-path }} - name: Setup miniconda - uses: conda-incubator/setup-miniconda@v2 + uses: conda-incubator/setup-miniconda@v2.1.1 with: auto-update-conda: true python-version: ${{ matrix.python }} @@ -227,7 +227,7 @@ jobs: TEST_CHANNELS: '-c ${{ env.channel-path }} ${{ env.CHANNELS }}' - name: Cache conda packages - uses: actions/cache@v3 + uses: actions/cache@v3.0.11 env: CACHE_NUMBER: 1 # Increase to reset cache with: @@ -284,7 +284,7 @@ jobs: steps: - name: Download artifact - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3.0.0 with: name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Python ${{ matrix.python }} path: ${{ env.pkg-path-in-channel }} @@ -304,7 +304,7 @@ jobs: dir ${{ env.extracted-pkg-path }} - name: Setup miniconda - uses: conda-incubator/setup-miniconda@v2 + uses: conda-incubator/setup-miniconda@v2.1.1 with: auto-update-conda: true python-version: ${{ matrix.python }} @@ -350,7 +350,7 @@ jobs: run: more lockfile - name: Cache conda packages - uses: actions/cache@v3 + uses: actions/cache@v3.0.11 env: CACHE_NUMBER: 1 # Increase to reset cache with: @@ -448,12 +448,12 @@ jobs: steps: - name: Download artifact - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3.0.0 with: name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Python ${{ matrix.python }} - name: Setup miniconda - uses: conda-incubator/setup-miniconda@v2 + uses: conda-incubator/setup-miniconda@v2.1.1 with: auto-update-conda: true python-version: ${{ matrix.python }} @@ -486,12 +486,12 @@ jobs: python: ['3.8', '3.9'] steps: - name: Download artifact - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3.0.0 with: name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Python ${{ matrix.python }} - name: Setup miniconda - uses: conda-incubator/setup-miniconda@v2 + uses: conda-incubator/setup-miniconda@v2.1.1 with: auto-update-conda: true python-version: ${{ matrix.python }} diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index 723347913ac3..56ce09e34765 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -9,6 +9,8 @@ jobs: pre-commit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 - - uses: actions/setup-python@v2 - - uses: pre-commit/action@v2.0.0 + - uses: actions/checkout@v3.1.0 + - uses: actions/setup-python@v4.3.0 + with: + python-version: '3.10' + - uses: pre-commit/action@v3.0.0 From a7180391ab90ec8e246f3a3f3860684a7269b792 Mon Sep 17 00:00:00 2001 From: Anton <100830759+antonwolfy@users.noreply.github.com> Date: Mon, 17 Oct 2022 21:26:26 +0200 Subject: [PATCH 27/30] Align normalize_queue_device call with CFD (#1200) * Align normalize_queue_device call with CFD in dpnp * Update dpnp/dpnp_iface.py Co-authored-by: Oleksandr Pavlyk * Update dpnp/dpnp_iface.py Co-authored-by: Oleksandr Pavlyk Co-authored-by: Oleksandr Pavlyk --- dpnp/dpnp_array.py | 7 +++--- dpnp/dpnp_container.py | 34 ++++++-------------------- dpnp/dpnp_iface.py | 53 ++++++++++++++++++++++++++++++++++++++-- tests/test_sycl_queue.py | 23 +++++++++++++++++ 4 files changed, 85 insertions(+), 32 deletions(-) diff --git a/dpnp/dpnp_array.py b/dpnp/dpnp_array.py index 05623fc67a0a..2b779f57b142 100644 --- a/dpnp/dpnp_array.py +++ b/dpnp/dpnp_array.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # ***************************************************************************** -# Copyright (c) 2016-2020, Intel Corporation +# Copyright (c) 2016-2022, Intel Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -25,10 +25,9 @@ # ***************************************************************************** import dpctl.tensor as dpt -from dpctl.tensor._device import normalize_queue_device -import dpnp import numpy +import dpnp class dpnp_array: """ @@ -64,7 +63,7 @@ def __init__(self, copy=False, order=order) else: - sycl_queue_normalized = normalize_queue_device(sycl_queue=sycl_queue, device=device) + sycl_queue_normalized = dpnp.get_normalized_queue_device(sycl_queue=sycl_queue, device=device) self._array_obj = dpt.usm_ndarray(shape, dtype=dtype, strides=strides, diff --git a/dpnp/dpnp_container.py b/dpnp/dpnp_container.py index bfc7e469d700..b4b134f9dc54 100644 --- a/dpnp/dpnp_container.py +++ b/dpnp/dpnp_container.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # ***************************************************************************** -# Copyright (c) 2016-2020, Intel Corporation +# Copyright (c) 2016-2022, Intel Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -34,28 +34,10 @@ """ -import dpnp.config as config -# from dpnp.dparray import dparray -from dpnp.dpnp_array import dpnp_array - -import numpy - import dpctl.tensor as dpt -from dpctl.tensor._device import normalize_queue_device - -if config.__DPNP_OUTPUT_DPCTL__: - try: - """ - Detect DPCtl availability to use data container - """ - import dpctl.tensor as dpctl - - except ImportError: - """ - No DPCtl data container available - """ - config.__DPNP_OUTPUT_DPCTL__ = 0 +from dpnp.dpnp_array import dpnp_array +import dpnp __all__ = [ @@ -77,14 +59,15 @@ def asarray(x1, else: x1_obj = x1 - sycl_queue_normalized = normalize_queue_device(sycl_queue=sycl_queue, device=device) + sycl_queue_normalized = dpnp.get_normalized_queue_device(x1_obj, sycl_queue=sycl_queue, device=device) + + """Converts incoming 'x1' object to 'dpnp_array'.""" array_obj = dpt.asarray(x1_obj, dtype=dtype, copy=copy, order=order, usm_type=usm_type, sycl_queue=sycl_queue_normalized) - return dpnp_array(array_obj.shape, buffer=array_obj, order=order) @@ -94,13 +77,12 @@ def empty(shape, device=None, usm_type="device", sycl_queue=None): - """Creates `dpnp_array` from uninitialized USM allocation.""" - sycl_queue_normalized = normalize_queue_device(sycl_queue=sycl_queue, device=device) + sycl_queue_normalized = dpnp.get_normalized_queue_device(sycl_queue=sycl_queue, device=device) + """Creates `dpnp_array` from uninitialized USM allocation.""" array_obj = dpt.empty(shape, dtype=dtype, order=order, usm_type=usm_type, sycl_queue=sycl_queue_normalized) - return dpnp_array(array_obj.shape, buffer=array_obj, order=order) diff --git a/dpnp/dpnp_iface.py b/dpnp/dpnp_iface.py index d0fccd6fcaed..5aa69ced4aec 100644 --- a/dpnp/dpnp_iface.py +++ b/dpnp/dpnp_iface.py @@ -2,7 +2,7 @@ # distutils: language = c++ # -*- coding: utf-8 -*- # ***************************************************************************** -# Copyright (c) 2016-2020, Intel Corporation +# Copyright (c) 2016-2022, Intel Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -47,6 +47,7 @@ import collections import dpctl +import dpctl.tensor as dpt from dpnp.dpnp_algo import * from dpnp.dpnp_utils import * @@ -62,7 +63,8 @@ "dpnp_queue_initialize", "dpnp_queue_is_cpu", "get_dpnp_descriptor", - "get_include" + "get_include", + "get_normalized_queue_device" ] from dpnp.dpnp_iface_arraycreation import * @@ -248,3 +250,50 @@ def get_include(): dpnp_path = os.path.join(os.path.dirname(__file__), "backend", "include") return dpnp_path + + +def get_normalized_queue_device(obj=None, + device=None, + sycl_queue=None): + """ + Utility to process complementary keyword arguments 'device' and 'sycl_queue' + in subsequent calls of functions from `dpctl.tensor` module. + + If both arguments 'device' and 'sycl_queue' have default value `None` + and 'obj' has `sycl_queue` attribute, it assumes that Compute Follows Data + approach has to be applied and so the resulting SYCL queue will be normalized + based on the queue value from 'obj'. + + Args: + obj (optional): A python object. Can be an instance of `dpnp_array`, + `dpctl.tensor.usm_ndarray`, an object representing SYCL USM allocation + and implementing `__sycl_usm_array_interface__` protocol, + an instance of `numpy.ndarray`, an object supporting Python buffer protocol, + a Python scalar, or a (possibly nested) sequence of Python scalars. + sycl_queue (:class:`dpctl.SyclQueue`, optional): + explicitly indicates where USM allocation is done + and the population code (if any) is executed. + Value `None` is interpreted as get the SYCL queue + from `obj` parameter if not None, from `device` keyword, + or use default queue. + Default: None + device (string, :class:`dpctl.SyclDevice`, :class:`dpctl.SyclQueue, + :class:`dpctl.tensor.Device`, optional): + array-API keyword indicating non-partitioned SYCL device + where array is allocated. + Returns + :class:`dpctl.SyclQueue` object normalized by `normalize_queue_device` call + of `dpctl.tensor` module invoked with 'device' and 'sycl_queue' values. + If both incoming 'device' and 'sycl_queue' are None and 'obj' has `sycl_queue` attribute, + the normalization will be performed for 'obj.sycl_queue' value. + Raises: + TypeError: if argument is not of the expected type, or keywords + imply incompatible queues. + """ + if device is None and sycl_queue is None and obj is not None and hasattr(obj, 'sycl_queue'): + sycl_queue = obj.sycl_queue + + # TODO: remove check dpt._device has attribute 'normalize_queue_device' + if hasattr(dpt._device, 'normalize_queue_device'): + return dpt._device.normalize_queue_device(sycl_queue=sycl_queue, device=device) + return sycl_queue diff --git a/tests/test_sycl_queue.py b/tests/test_sycl_queue.py index b858db48a1fa..1f625d408014 100644 --- a/tests/test_sycl_queue.py +++ b/tests/test_sycl_queue.py @@ -319,3 +319,26 @@ def test_to_device(device_from, device_to): y = x.to_device(device_to) assert y.get_array().sycl_device == device_to + + +@pytest.mark.parametrize("device", + valid_devices, + ids=[device.filter_string for device in valid_devices]) +@pytest.mark.parametrize("func", + ["array", "asarray"]) +@pytest.mark.parametrize("device_param", + ["", "None", "sycl_device"], + ids=['Empty', 'None', "device"]) +@pytest.mark.parametrize("queue_param", + ["", "None", "sycl_queue"], + ids=['Empty', 'None', "queue"]) +def test_array_copy(device, func, device_param, queue_param): + data = numpy.ones(100) + dpnp_data = getattr(dpnp, func)(data, device=device) + + kwargs_items = {'device': device_param, 'sycl_queue': queue_param}.items() + kwargs = {k: getattr(dpnp_data, v, None) for k,v in kwargs_items if v != ""} + + result = dpnp.array(dpnp_data, **kwargs) + + assert_sycl_queue_equal(result.sycl_queue, dpnp_data.sycl_queue) From 35e28a4ddb6c6d96290fa72ec49758628431f5f7 Mon Sep 17 00:00:00 2001 From: Lukicheva Polina <63358667+LukichevaPolina@users.noreply.github.com> Date: Tue, 18 Oct 2022 01:48:12 +0300 Subject: [PATCH 28/30] Enable Compute Follows Data in Cython in fft and linalg (#1132) --- .github/workflows/conda-package.yml | 8 +- dpnp/backend/kernels/dpnp_krnl_common.cpp | 23 +- dpnp/backend/kernels/dpnp_krnl_fft.cpp | 179 ++++++++------ dpnp/backend/kernels/dpnp_krnl_linalg.cpp | 57 ++++- dpnp/dpnp_algo/dpnp_algo.pxd | 5 +- dpnp/fft/dpnp_algo_fft.pyx | 71 +++++- dpnp/fft/dpnp_iface_fft.py | 40 +-- dpnp/linalg/dpnp_algo_linalg.pyx | 274 ++++++++++++++++++--- dpnp/linalg/dpnp_iface_linalg.py | 20 +- tests/skipped_tests.tbl | 169 +++++++++---- tests/skipped_tests_gpu.tbl | 108 ++------ tests/test_fft.py | 123 +++++----- tests/test_linalg.py | 69 ++++-- tests/test_sycl_queue.py | 286 +++++++++++++++++++++- 14 files changed, 1042 insertions(+), 390 deletions(-) diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml index 39867a86bb13..4553b7c49059 100644 --- a/.github/workflows/conda-package.yml +++ b/.github/workflows/conda-package.yml @@ -248,12 +248,14 @@ jobs: - name: Smoke test run: python -c "import dpnp, dpctl; dpctl.lsplatform()" + env: + OCL_ICD_FILENAMES: 'libintelocl.so' # TODO: run the whole scope once the issues on CPU are resolved - name: Run tests - run: python -m pytest -q -ra --disable-warnings -vv test_arraycreation.py test_dparray.py test_mathematical.py test_special.py + run: python -m pytest -q -ra --disable-warnings -vv test_arraycreation.py test_dparray.py test_fft.py test_linalg.py test_mathematical.py test_special.py env: - SYCL_ENABLE_HOST_DEVICE: '1' + OCL_ICD_FILENAMES: 'libintelocl.so' working-directory: ${{ env.tests-path }} test_windows: @@ -426,7 +428,7 @@ jobs: # TODO: run the whole scope once the issues on CPU are resolved - name: Run tests - run: python -m pytest -q -ra --disable-warnings -vv test_arraycreation.py test_dparray.py test_mathematical.py test_special.py + run: python -m pytest -q -ra --disable-warnings -vv test_arraycreation.py test_dparray.py test_fft.py test_linalg.py test_mathematical.py test_special.py working-directory: ${{ env.tests-path }} upload_linux: diff --git a/dpnp/backend/kernels/dpnp_krnl_common.cpp b/dpnp/backend/kernels/dpnp_krnl_common.cpp index 7ae9127041a0..541b34d4fbc4 100644 --- a/dpnp/backend/kernels/dpnp_krnl_common.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_common.cpp @@ -1,5 +1,5 @@ //***************************************************************************** -// Copyright (c) 2016-2020, Intel Corporation +// Copyright (c) 2016-2022, Intel Corporation // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -97,6 +97,7 @@ void dpnp_astype_c(const void* array1_in, void* result1, const size_t size) size, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -477,6 +478,7 @@ void dpnp_dot_c(void* result_out, input2_strides, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -614,6 +616,7 @@ void dpnp_eig_c(const void* array_in, void* result1, void* result2, size_t size) size, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -707,6 +710,7 @@ void dpnp_eigvals_c(const void* array_in, void* result1, size_t size) size, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -774,7 +778,7 @@ void dpnp_initval_c(void* result1, void* value, size_t size) size, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); - + DPCTLEvent_Delete(event_ref); } template @@ -941,6 +945,7 @@ void dpnp_matmul_c(void* result_out, input2_strides, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -1112,11 +1117,25 @@ void func_map_init_linalg(func_map_t& fmap) fmap[DPNPFuncName::DPNP_FN_EIG][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_eig_default_c}; fmap[DPNPFuncName::DPNP_FN_EIG][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_eig_default_c}; + fmap[DPNPFuncName::DPNP_FN_EIG_EXT][eft_INT][eft_INT] = {eft_DBL, (void*)dpnp_eig_ext_c}; + fmap[DPNPFuncName::DPNP_FN_EIG_EXT][eft_LNG][eft_LNG] = {eft_DBL, (void*)dpnp_eig_ext_c}; + fmap[DPNPFuncName::DPNP_FN_EIG_EXT][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_eig_ext_c}; + fmap[DPNPFuncName::DPNP_FN_EIG_EXT][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_eig_ext_c}; + fmap[DPNPFuncName::DPNP_FN_EIGVALS][eft_INT][eft_INT] = {eft_DBL, (void*)dpnp_eigvals_default_c}; fmap[DPNPFuncName::DPNP_FN_EIGVALS][eft_LNG][eft_LNG] = {eft_DBL, (void*)dpnp_eigvals_default_c}; fmap[DPNPFuncName::DPNP_FN_EIGVALS][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_eigvals_default_c}; fmap[DPNPFuncName::DPNP_FN_EIGVALS][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_eigvals_default_c}; + fmap[DPNPFuncName::DPNP_FN_EIGVALS_EXT][eft_INT][eft_INT] = {eft_DBL, + (void*)dpnp_eigvals_ext_c}; + fmap[DPNPFuncName::DPNP_FN_EIGVALS_EXT][eft_LNG][eft_LNG] = {eft_DBL, + (void*)dpnp_eigvals_ext_c}; + fmap[DPNPFuncName::DPNP_FN_EIGVALS_EXT][eft_FLT][eft_FLT] = {eft_FLT, + (void*)dpnp_eigvals_ext_c}; + fmap[DPNPFuncName::DPNP_FN_EIGVALS_EXT][eft_DBL][eft_DBL] = {eft_DBL, + (void*)dpnp_eigvals_ext_c}; + fmap[DPNPFuncName::DPNP_FN_INITVAL][eft_BLN][eft_BLN] = {eft_BLN, (void*)dpnp_initval_default_c}; fmap[DPNPFuncName::DPNP_FN_INITVAL][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_initval_default_c}; fmap[DPNPFuncName::DPNP_FN_INITVAL][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_initval_default_c}; diff --git a/dpnp/backend/kernels/dpnp_krnl_fft.cpp b/dpnp/backend/kernels/dpnp_krnl_fft.cpp index 23f612012720..3d39f2f373c7 100644 --- a/dpnp/backend/kernels/dpnp_krnl_fft.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_fft.cpp @@ -1,5 +1,5 @@ //***************************************************************************** -// Copyright (c) 2016-2020, Intel Corporation +// Copyright (c) 2016-2022, Intel Corporation // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -48,17 +48,17 @@ template -void dpnp_fft_fft_sycl_c(DPCTLSyclQueueRef q_ref, - const void* array1_in, - void* result_out, - const shape_elem_type* input_shape, - const shape_elem_type* output_shape, - size_t shape_size, - const size_t result_size, - const size_t input_size, - long axis, - long input_boundarie, - size_t inverse) +static void dpnp_fft_fft_sycl_c(DPCTLSyclQueueRef q_ref, + const void* array1_in, + void* result_out, + const shape_elem_type* input_shape, + const shape_elem_type* output_shape, + size_t shape_size, + const size_t result_size, + const size_t input_size, + long axis, + long input_boundarie, + size_t inverse) { if (!(input_size && result_size && shape_size)) { @@ -71,9 +71,8 @@ void dpnp_fft_fft_sycl_c(DPCTLSyclQueueRef q_ref, sycl::queue queue = *(reinterpret_cast(q_ref)); - DPNPC_ptr_adapter<_DataType_input> input1_ptr(q_ref, array1_in, input_size); - const _DataType_input* array_1 = input1_ptr.get_ptr(); - _DataType_output* result = reinterpret_cast<_DataType_output*>(result_out); + _DataType_input* array_1 = static_cast<_DataType_input *>(const_cast(array1_in)); + _DataType_output* result = static_cast<_DataType_output *>(result_out); // kernel specific temporal data shape_elem_type* output_shape_offsets = @@ -171,29 +170,28 @@ void dpnp_fft_fft_sycl_c(DPCTLSyclQueueRef q_ref, } template -void dpnp_fft_fft_mathlib_cmplx_to_cmplx_c(DPCTLSyclQueueRef q_ref, - const void* array1_in, - void* result_out, - const shape_elem_type* input_shape, - const shape_elem_type*, - const size_t shape_size, - const size_t input_size, - const size_t result_size, - _Descriptor_type& desc, - size_t inverse, - const size_t norm) +static void dpnp_fft_fft_mathlib_cmplx_to_cmplx_c(DPCTLSyclQueueRef q_ref, + const void* array1_in, + void* result_out, + const shape_elem_type* input_shape, + const shape_elem_type* result_shape, + const size_t shape_size, + const size_t input_size, + const size_t result_size, + _Descriptor_type& desc, + size_t inverse, + const size_t norm) { - if (!shape_size) - { + (void)result_shape; + + if (!shape_size) { return; } sycl::queue queue = *(reinterpret_cast(q_ref)); - DPNPC_ptr_adapter<_DataType_input> input1_ptr(q_ref, array1_in, input_size); - DPNPC_ptr_adapter<_DataType_output> result_ptr(q_ref, result_out, result_size); - _DataType_input* array_1 = input1_ptr.get_ptr(); - _DataType_output* result = result_ptr.get_ptr(); + _DataType_input* array_1 = static_cast<_DataType_input *>(const_cast(array1_in)); + _DataType_output* result = static_cast<_DataType_output *>(result_out); const size_t n_iter = std::accumulate(input_shape, input_shape + shape_size - 1, 1, std::multiplies()); @@ -242,31 +240,29 @@ template -void dpnp_fft_fft_mathlib_real_to_cmplx_c(DPCTLSyclQueueRef q_ref, - const void* array1_in, - void* result_out, - const shape_elem_type* input_shape, - const shape_elem_type* result_shape, - const size_t shape_size, - const size_t input_size, - const size_t result_size, - _Descriptor_type& desc, - size_t inverse, - const size_t norm, - const size_t real) +static DPCTLSyclEventRef dpnp_fft_fft_mathlib_real_to_cmplx_c(DPCTLSyclQueueRef q_ref, + const void* array1_in, + void* result_out, + const shape_elem_type* input_shape, + const shape_elem_type* result_shape, + const size_t shape_size, + const size_t input_size, + const size_t result_size, + _Descriptor_type& desc, + size_t inverse, + const size_t norm, + const size_t real) { - if (!shape_size) - { - return; + DPCTLSyclEventRef event_ref = nullptr; + if (!shape_size) { + return event_ref; } - DPNPC_ptr_adapter<_DataType_input> input1_ptr(q_ref, array1_in, input_size); - DPNPC_ptr_adapter<_DataType_output> result_ptr(q_ref, result_out, result_size * 2, true, true); - _DataType_input* array_1 = input1_ptr.get_ptr(); - _DataType_output* result = result_ptr.get_ptr(); - sycl::queue queue = *(reinterpret_cast(q_ref)); + _DataType_input* array_1 = static_cast<_DataType_input *>(const_cast(array1_in)); + _DataType_output* result = static_cast<_DataType_output *>(result_out); + const size_t n_iter = std::accumulate(input_shape, input_shape + shape_size - 1, 1, std::multiplies()); @@ -308,7 +304,7 @@ void dpnp_fft_fft_mathlib_real_to_cmplx_c(DPCTLSyclQueueRef q_ref, sycl::event::wait(fft_events); if (real) { // the output size of the rfft function is input_size/2 + 1 so we don't need to fill the second half of the output - return; + return event_ref; } size_t n_conj = result_shift % 2 == 0 ? result_shift / 2 - 1 : result_shift / 2; @@ -322,7 +318,8 @@ void dpnp_fft_fft_mathlib_real_to_cmplx_c(DPCTLSyclQueueRef q_ref, { size_t j = global_id[1]; { - *(reinterpret_cast*>(result) + result_shift * (i + 1) - (j + 1)) = std::conj(*(reinterpret_cast*>(result) + result_shift * i + (j + 1))); + *(reinterpret_cast*>(result) + result_shift * (i + 1) - (j + 1)) = + std::conj(*(reinterpret_cast*>(result) + result_shift * i + (j + 1))); } } }; @@ -333,14 +330,18 @@ void dpnp_fft_fft_mathlib_real_to_cmplx_c(DPCTLSyclQueueRef q_ref, }; event = queue.submit(kernel_func); - event.wait(); if (inverse) { - event = oneapi::mkl::vm::conj(queue, result_size, reinterpret_cast*>(result), reinterpret_cast*>(result)); event.wait(); + event = oneapi::mkl::vm::conj(queue, + result_size, + reinterpret_cast*>(result), + reinterpret_cast*>(result)); } - return; + event_ref = reinterpret_cast(&event); + + return DPCTLEvent_Copy(event_ref); } template @@ -394,7 +395,7 @@ DPCTLSyclEventRef dpnp_fft_fft_c(DPCTLSyclQueueRef q_ref, { desc_dp_real_t desc(dim); - dpnp_fft_fft_mathlib_real_to_cmplx_c<_DataType_input, double, desc_dp_real_t>( + event_ref = dpnp_fft_fft_mathlib_real_to_cmplx_c<_DataType_input, double, desc_dp_real_t>( q_ref, array1_in, result_out, input_shape, result_shape, shape_size, input_size, result_size, desc, inverse, norm, 0); } /* real-to-complex, single precision */ @@ -402,26 +403,33 @@ DPCTLSyclEventRef dpnp_fft_fft_c(DPCTLSyclQueueRef q_ref, std::is_same<_DataType_output, std::complex>::value) { desc_sp_real_t desc(dim); // try: 2 * result_size - dpnp_fft_fft_mathlib_real_to_cmplx_c<_DataType_input, float, desc_sp_real_t>( + + event_ref = dpnp_fft_fft_mathlib_real_to_cmplx_c<_DataType_input, float, desc_sp_real_t>( q_ref, array1_in, result_out, input_shape, result_shape, shape_size, input_size, result_size, desc, inverse, norm, 0); } else if constexpr (std::is_same<_DataType_input, int32_t>::value || std::is_same<_DataType_input, int64_t>::value) { - double* array1_copy = reinterpret_cast(dpnp_memory_alloc_c(input_size * sizeof(double))); + double* array1_copy = reinterpret_cast(dpnp_memory_alloc_c(q_ref, input_size * sizeof(double))); shape_elem_type* copy_strides = reinterpret_cast(dpnp_memory_alloc_c(q_ref, sizeof(shape_elem_type))); *copy_strides = 1; shape_elem_type* copy_shape = reinterpret_cast(dpnp_memory_alloc_c(q_ref, sizeof(shape_elem_type))); *copy_shape = input_size; shape_elem_type copy_shape_size = 1; - dpnp_copyto_c<_DataType_input, double>(q_ref, array1_copy, input_size, copy_shape_size, copy_shape, copy_strides, - array1_in, input_size, copy_shape_size, copy_shape, copy_strides, NULL, dep_event_vec_ref); + event_ref = dpnp_copyto_c<_DataType_input, double>(q_ref, array1_copy, input_size, copy_shape_size, copy_shape, copy_strides, + array1_in, input_size, copy_shape_size, copy_shape, copy_strides, NULL, dep_event_vec_ref); + DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); desc_dp_real_t desc(dim); - dpnp_fft_fft_mathlib_real_to_cmplx_c( + event_ref = dpnp_fft_fft_mathlib_real_to_cmplx_c( q_ref, array1_copy, result_out, input_shape, result_shape, shape_size, input_size, result_size, desc, inverse, norm, 0); + DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); + event_ref = nullptr; + dpnp_memory_free_c(q_ref, array1_copy); dpnp_memory_free_c(q_ref, copy_strides); dpnp_memory_free_c(q_ref, copy_shape); @@ -470,6 +478,7 @@ void dpnp_fft_fft_c(const void* array1_in, norm, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -524,7 +533,6 @@ DPCTLSyclEventRef dpnp_fft_rfft_c(DPCTLSyclQueueRef q_ref, size_t dim = input_shape[shape_size - 1]; - if constexpr (std::is_same<_DataType_output, std::complex>::value || std::is_same<_DataType_output, std::complex>::value) { @@ -533,7 +541,7 @@ DPCTLSyclEventRef dpnp_fft_rfft_c(DPCTLSyclQueueRef q_ref, { desc_dp_real_t desc(dim); - dpnp_fft_fft_mathlib_real_to_cmplx_c<_DataType_input, double, desc_dp_real_t>( + event_ref = dpnp_fft_fft_mathlib_real_to_cmplx_c<_DataType_input, double, desc_dp_real_t>( q_ref, array1_in, result_out, input_shape, result_shape, shape_size, input_size, result_size, desc, inverse, norm, 1); } /* real-to-complex, single precision */ @@ -541,26 +549,32 @@ DPCTLSyclEventRef dpnp_fft_rfft_c(DPCTLSyclQueueRef q_ref, std::is_same<_DataType_output, std::complex>::value) { desc_sp_real_t desc(dim); // try: 2 * result_size - dpnp_fft_fft_mathlib_real_to_cmplx_c<_DataType_input, float, desc_sp_real_t>( + event_ref = dpnp_fft_fft_mathlib_real_to_cmplx_c<_DataType_input, float, desc_sp_real_t>( q_ref, array1_in, result_out, input_shape, result_shape, shape_size, input_size, result_size, desc, inverse, norm, 1); } else if constexpr (std::is_same<_DataType_input, int32_t>::value || std::is_same<_DataType_input, int64_t>::value) { - double* array1_copy = reinterpret_cast(dpnp_memory_alloc_c(input_size * sizeof(double))); + double* array1_copy = reinterpret_cast(dpnp_memory_alloc_c(q_ref, input_size * sizeof(double))); shape_elem_type* copy_strides = reinterpret_cast(dpnp_memory_alloc_c(q_ref, sizeof(shape_elem_type))); *copy_strides = 1; shape_elem_type* copy_shape = reinterpret_cast(dpnp_memory_alloc_c(q_ref, sizeof(shape_elem_type))); *copy_shape = input_size; shape_elem_type copy_shape_size = 1; - dpnp_copyto_c<_DataType_input, double>(q_ref, array1_copy, input_size, copy_shape_size, copy_shape, copy_strides, - array1_in, input_size, copy_shape_size, copy_shape, copy_strides, NULL, dep_event_vec_ref); + event_ref = dpnp_copyto_c<_DataType_input, double>(q_ref, array1_copy, input_size, copy_shape_size, copy_shape, copy_strides, + array1_in, input_size, copy_shape_size, copy_shape, copy_strides, NULL, dep_event_vec_ref); + DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); desc_dp_real_t desc(dim); - dpnp_fft_fft_mathlib_real_to_cmplx_c( + event_ref = dpnp_fft_fft_mathlib_real_to_cmplx_c( q_ref, array1_copy, result_out, input_shape, result_shape, shape_size, input_size, result_size, desc, inverse, norm, 1); + DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); + event_ref = nullptr; + dpnp_memory_free_c(q_ref, array1_copy); dpnp_memory_free_c(q_ref, copy_strides); dpnp_memory_free_c(q_ref, copy_shape); @@ -596,6 +610,7 @@ void dpnp_fft_rfft_c(const void* array1_in, norm, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -636,6 +651,20 @@ void func_map_init_fft_func(func_map_t& fmap) eft_C64, (void*)dpnp_fft_fft_default_c, std::complex>}; fmap[DPNPFuncName::DPNP_FN_FFT_FFT][eft_C128][eft_C128] = { eft_C128, (void*)dpnp_fft_fft_default_c, std::complex>}; + + fmap[DPNPFuncName::DPNP_FN_FFT_FFT_EXT][eft_INT][eft_INT] = { + eft_C128, (void*)dpnp_fft_fft_ext_c>}; + fmap[DPNPFuncName::DPNP_FN_FFT_FFT_EXT][eft_LNG][eft_LNG] = { + eft_C128, (void*)dpnp_fft_fft_ext_c>}; + fmap[DPNPFuncName::DPNP_FN_FFT_FFT_EXT][eft_FLT][eft_FLT] = { + eft_C64, (void*)dpnp_fft_fft_ext_c>}; + fmap[DPNPFuncName::DPNP_FN_FFT_FFT_EXT][eft_DBL][eft_DBL] = { + eft_C128, (void*)dpnp_fft_fft_ext_c>}; + fmap[DPNPFuncName::DPNP_FN_FFT_FFT_EXT][eft_C64][eft_C64] = { + eft_C64, (void*)dpnp_fft_fft_ext_c, std::complex>}; + fmap[DPNPFuncName::DPNP_FN_FFT_FFT_EXT][eft_C128][eft_C128] = { + eft_C128, (void*)dpnp_fft_fft_ext_c, std::complex>}; + fmap[DPNPFuncName::DPNP_FN_FFT_RFFT][eft_INT][eft_INT] = { eft_C128, (void*)dpnp_fft_rfft_default_c>}; fmap[DPNPFuncName::DPNP_FN_FFT_RFFT][eft_LNG][eft_LNG] = { @@ -644,5 +673,15 @@ void func_map_init_fft_func(func_map_t& fmap) eft_C64, (void*)dpnp_fft_rfft_default_c>}; fmap[DPNPFuncName::DPNP_FN_FFT_RFFT][eft_DBL][eft_DBL] = { eft_C128, (void*)dpnp_fft_rfft_default_c>}; + + fmap[DPNPFuncName::DPNP_FN_FFT_RFFT_EXT][eft_INT][eft_INT] = { + eft_C128, (void*)dpnp_fft_rfft_ext_c>}; + fmap[DPNPFuncName::DPNP_FN_FFT_RFFT_EXT][eft_LNG][eft_LNG] = { + eft_C128, (void*)dpnp_fft_rfft_ext_c>}; + fmap[DPNPFuncName::DPNP_FN_FFT_RFFT_EXT][eft_FLT][eft_FLT] = { + eft_C64, (void*)dpnp_fft_rfft_ext_c>}; + fmap[DPNPFuncName::DPNP_FN_FFT_RFFT_EXT][eft_DBL][eft_DBL] = { + eft_C128, (void*)dpnp_fft_rfft_ext_c>}; + return; } diff --git a/dpnp/backend/kernels/dpnp_krnl_linalg.cpp b/dpnp/backend/kernels/dpnp_krnl_linalg.cpp index 77bdad0c6b1c..dff1320d5c24 100644 --- a/dpnp/backend/kernels/dpnp_krnl_linalg.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_linalg.cpp @@ -1,5 +1,5 @@ //***************************************************************************** -// Copyright (c) 2016-2020, Intel Corporation +// Copyright (c) 2016-2022, Intel Corporation // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -122,6 +122,7 @@ void dpnp_cholesky_c(void* array1_in, void* result1, const size_t size, const si data_size, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -263,6 +264,7 @@ void dpnp_det_c(void* array1_in, void* result1, shape_elem_type* shape, size_t n ndim, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -405,6 +407,7 @@ void dpnp_inv_c(void* array1_in, void* result1, shape_elem_type* shape, size_t n ndim, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -528,6 +531,7 @@ void dpnp_kron_c(void* array1_in, ndim, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -616,6 +620,7 @@ void dpnp_matrix_rank_c(void* array1_in, void* result1, shape_elem_type* shape, ndim, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -753,6 +758,7 @@ void dpnp_qr_c(void* array1_in, void* result1, void* result2, void* result3, siz size_n, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -852,6 +858,7 @@ void dpnp_svd_c(void* array1_in, void* result1, void* result2, void* result3, si size_n, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -872,16 +879,29 @@ void func_map_init_linalg_func(func_map_t& fmap) fmap[DPNPFuncName::DPNP_FN_CHOLESKY][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_cholesky_default_c}; fmap[DPNPFuncName::DPNP_FN_CHOLESKY][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_cholesky_default_c}; + fmap[DPNPFuncName::DPNP_FN_CHOLESKY_EXT][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_cholesky_ext_c}; + fmap[DPNPFuncName::DPNP_FN_CHOLESKY_EXT][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_cholesky_ext_c}; + fmap[DPNPFuncName::DPNP_FN_DET][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_det_default_c}; fmap[DPNPFuncName::DPNP_FN_DET][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_det_default_c}; fmap[DPNPFuncName::DPNP_FN_DET][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_det_default_c}; fmap[DPNPFuncName::DPNP_FN_DET][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_det_default_c}; + fmap[DPNPFuncName::DPNP_FN_DET_EXT][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_det_ext_c}; + fmap[DPNPFuncName::DPNP_FN_DET_EXT][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_det_ext_c}; + fmap[DPNPFuncName::DPNP_FN_DET_EXT][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_det_ext_c}; + fmap[DPNPFuncName::DPNP_FN_DET_EXT][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_det_ext_c}; + fmap[DPNPFuncName::DPNP_FN_INV][eft_INT][eft_INT] = {eft_DBL, (void*)dpnp_inv_default_c}; fmap[DPNPFuncName::DPNP_FN_INV][eft_LNG][eft_LNG] = {eft_DBL, (void*)dpnp_inv_default_c}; fmap[DPNPFuncName::DPNP_FN_INV][eft_FLT][eft_FLT] = {eft_DBL, (void*)dpnp_inv_default_c}; fmap[DPNPFuncName::DPNP_FN_INV][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_inv_default_c}; + fmap[DPNPFuncName::DPNP_FN_INV_EXT][eft_INT][eft_INT] = {eft_DBL, (void*)dpnp_inv_ext_c}; + fmap[DPNPFuncName::DPNP_FN_INV_EXT][eft_LNG][eft_LNG] = {eft_DBL, (void*)dpnp_inv_ext_c}; + fmap[DPNPFuncName::DPNP_FN_INV_EXT][eft_FLT][eft_FLT] = {eft_DBL, (void*)dpnp_inv_ext_c}; + fmap[DPNPFuncName::DPNP_FN_INV_EXT][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_inv_ext_c}; + fmap[DPNPFuncName::DPNP_FN_KRON][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_kron_default_c}; fmap[DPNPFuncName::DPNP_FN_KRON][eft_INT][eft_LNG] = {eft_LNG, @@ -989,6 +1009,11 @@ void func_map_init_linalg_func(func_map_t& fmap) fmap[DPNPFuncName::DPNP_FN_MATRIX_RANK][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_matrix_rank_default_c}; fmap[DPNPFuncName::DPNP_FN_MATRIX_RANK][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_matrix_rank_default_c}; + fmap[DPNPFuncName::DPNP_FN_MATRIX_RANK_EXT][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_matrix_rank_ext_c}; + fmap[DPNPFuncName::DPNP_FN_MATRIX_RANK_EXT][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_matrix_rank_ext_c}; + fmap[DPNPFuncName::DPNP_FN_MATRIX_RANK_EXT][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_matrix_rank_ext_c}; + fmap[DPNPFuncName::DPNP_FN_MATRIX_RANK_EXT][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_matrix_rank_ext_c}; + fmap[DPNPFuncName::DPNP_FN_QR][eft_INT][eft_INT] = {eft_DBL, (void*)dpnp_qr_default_c}; fmap[DPNPFuncName::DPNP_FN_QR][eft_LNG][eft_LNG] = {eft_DBL, (void*)dpnp_qr_default_c}; fmap[DPNPFuncName::DPNP_FN_QR][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_qr_default_c}; @@ -996,12 +1021,34 @@ void func_map_init_linalg_func(func_map_t& fmap) // fmap[DPNPFuncName::DPNP_FN_QR][eft_C128][eft_C128] = { // eft_C128, (void*)dpnp_qr_c, std::complex>}; - fmap[DPNPFuncName::DPNP_FN_SVD][eft_INT][eft_INT] = {eft_DBL, (void*)dpnp_svd_default_c}; - fmap[DPNPFuncName::DPNP_FN_SVD][eft_LNG][eft_LNG] = {eft_DBL, (void*)dpnp_svd_default_c}; - fmap[DPNPFuncName::DPNP_FN_SVD][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_svd_default_c}; - fmap[DPNPFuncName::DPNP_FN_SVD][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_svd_default_c}; + fmap[DPNPFuncName::DPNP_FN_QR_EXT][eft_INT][eft_INT] = {eft_DBL, (void*)dpnp_qr_ext_c}; + fmap[DPNPFuncName::DPNP_FN_QR_EXT][eft_LNG][eft_LNG] = {eft_DBL, (void*)dpnp_qr_ext_c}; + fmap[DPNPFuncName::DPNP_FN_QR_EXT][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_qr_ext_c}; + fmap[DPNPFuncName::DPNP_FN_QR_EXT][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_qr_ext_c}; + // fmap[DPNPFuncName::DPNP_FN_QR_EXT][eft_C128][eft_C128] = { + // eft_C128, (void*)dpnp_qr_c, std::complex>}; + + fmap[DPNPFuncName::DPNP_FN_SVD][eft_INT][eft_INT] = {eft_DBL, + (void*)dpnp_svd_default_c}; + fmap[DPNPFuncName::DPNP_FN_SVD][eft_LNG][eft_LNG] = {eft_DBL, + (void*)dpnp_svd_default_c}; + fmap[DPNPFuncName::DPNP_FN_SVD][eft_FLT][eft_FLT] = {eft_FLT, + (void*)dpnp_svd_default_c}; + fmap[DPNPFuncName::DPNP_FN_SVD][eft_DBL][eft_DBL] = {eft_DBL, + (void*)dpnp_svd_default_c}; fmap[DPNPFuncName::DPNP_FN_SVD][eft_C128][eft_C128] = { eft_C128, (void*)dpnp_svd_default_c, std::complex, double>}; + + fmap[DPNPFuncName::DPNP_FN_SVD_EXT][eft_INT][eft_INT] = {eft_DBL, + (void*)dpnp_svd_ext_c}; + fmap[DPNPFuncName::DPNP_FN_SVD_EXT][eft_LNG][eft_LNG] = {eft_DBL, + (void*)dpnp_svd_ext_c}; + fmap[DPNPFuncName::DPNP_FN_SVD_EXT][eft_FLT][eft_FLT] = {eft_FLT, + (void*)dpnp_svd_ext_c}; + fmap[DPNPFuncName::DPNP_FN_SVD_EXT][eft_DBL][eft_DBL] = {eft_DBL, + (void*)dpnp_svd_ext_c}; + fmap[DPNPFuncName::DPNP_FN_SVD_EXT][eft_C128][eft_C128] = { + eft_C128, (void*)dpnp_svd_ext_c, std::complex, double>}; return; } diff --git a/dpnp/dpnp_algo/dpnp_algo.pxd b/dpnp/dpnp_algo/dpnp_algo.pxd index 2d3f1a7870a9..e604a71f4492 100644 --- a/dpnp/dpnp_algo/dpnp_algo.pxd +++ b/dpnp/dpnp_algo/dpnp_algo.pxd @@ -1,7 +1,7 @@ # cython: language_level=3 # -*- coding: utf-8 -*- # ***************************************************************************** -# Copyright (c) 2016-2020, Intel Corporation +# Copyright (c) 2016-2022, Intel Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -26,8 +26,6 @@ # ***************************************************************************** cimport dpctl as c_dpctl -cimport dpctl as c_dpctl - from libcpp cimport bool as cpp_bool from dpnp.dpnp_utils.dpnp_algo_utils cimport dpnp_descriptor @@ -145,6 +143,7 @@ cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName": # need this na DPNP_FN_FFT_FFT DPNP_FN_FFT_FFT_EXT DPNP_FN_FFT_RFFT + DPNP_FN_FFT_RFFT_EXT DPNP_FN_FILL_DIAGONAL DPNP_FN_FILL_DIAGONAL_EXT DPNP_FN_FLATTEN diff --git a/dpnp/fft/dpnp_algo_fft.pyx b/dpnp/fft/dpnp_algo_fft.pyx index d63c7bf9fc68..393c744d4f36 100644 --- a/dpnp/fft/dpnp_algo_fft.pyx +++ b/dpnp/fft/dpnp_algo_fft.pyx @@ -1,7 +1,7 @@ # cython: language_level=3 # -*- coding: utf-8 -*- # ***************************************************************************** -# Copyright (c) 2016-2020, Intel Corporation +# Copyright (c) 2016-2022, Intel Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -42,8 +42,9 @@ __all__ = [ "dpnp_rfft" ] -ctypedef void(*fptr_dpnp_fft_fft_t)(void *, void * , shape_elem_type * , shape_elem_type * , - size_t, long, long, size_t, size_t) +ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_fft_fft_t)(c_dpctl.DPCTLSyclQueueRef, void *, void * , + shape_elem_type * , shape_elem_type * , size_t, long, + long, size_t, size_t, const c_dpctl.DPCTLEventVectorRef) cpdef utils.dpnp_descriptor dpnp_fft(utils.dpnp_descriptor input, @@ -63,15 +64,39 @@ cpdef utils.dpnp_descriptor dpnp_fft(utils.dpnp_descriptor input, cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(input.dtype) # get the FPTR data structure - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_FFT_FFT, param1_type, param1_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_FFT_FFT_EXT, param1_type, param1_type) + + input_obj = input.get_array() # ceate result array with type given by FPTR data - cdef utils.dpnp_descriptor result = utils.create_output_descriptor(output_shape, kernel_data.return_type, None) + cdef utils.dpnp_descriptor result = utils.create_output_descriptor(output_shape, + kernel_data.return_type, + None, + device=input_obj.sycl_device, + usm_type=input_obj.usm_type, + sycl_queue=input_obj.sycl_queue) + + result_sycl_queue = result.get_array().sycl_queue + + cdef c_dpctl.SyclQueue q = result_sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() cdef fptr_dpnp_fft_fft_t func = kernel_data.ptr # call FPTR function - func(input.get_data(), result.get_data(), input_shape.data(), - output_shape.data(), input_shape.size(), axis_norm, input_boundarie, inverse, norm) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, + input.get_data(), + result.get_data(), + input_shape.data(), + output_shape.data(), + input_shape.size(), + axis_norm, + input_boundarie, + inverse, + norm, + NULL) # dep_events_ref + + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) return result @@ -93,14 +118,38 @@ cpdef utils.dpnp_descriptor dpnp_rfft(utils.dpnp_descriptor input, cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(input.dtype) # get the FPTR data structure - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_FFT_RFFT, param1_type, param1_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_FFT_RFFT_EXT, param1_type, param1_type) + + input_obj = input.get_array() # ceate result array with type given by FPTR data - cdef utils.dpnp_descriptor result = utils.create_output_descriptor(output_shape, kernel_data.return_type, None) + cdef utils.dpnp_descriptor result = utils.create_output_descriptor(output_shape, + kernel_data.return_type, + None, + device=input_obj.sycl_device, + usm_type=input_obj.usm_type, + sycl_queue=input_obj.sycl_queue) + + result_sycl_queue = result.get_array().sycl_queue + + cdef c_dpctl.SyclQueue q = result_sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() cdef fptr_dpnp_fft_fft_t func = kernel_data.ptr # call FPTR function - func(input.get_data(), result.get_data(), input_shape.data(), - output_shape.data(), input_shape.size(), axis_norm, input_boundarie, inverse, norm) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, + input.get_data(), + result.get_data(), + input_shape.data(), + output_shape.data(), + input_shape.size(), + axis_norm, + input_boundarie, + inverse, + norm, + NULL) # dep_events_ref + + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) return result diff --git a/dpnp/fft/dpnp_iface_fft.py b/dpnp/fft/dpnp_iface_fft.py index 952a9c72a8b6..986dfaa8c619 100644 --- a/dpnp/fft/dpnp_iface_fft.py +++ b/dpnp/fft/dpnp_iface_fft.py @@ -2,7 +2,7 @@ # distutils: language = c++ # -*- coding: utf-8 -*- # ***************************************************************************** -# Copyright (c) 2016-2020, Intel Corporation +# Copyright (c) 2016-2022, Intel Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -100,7 +100,7 @@ def fft(x1, n=None, axis=-1, norm=None): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc: norm_ = get_validated_norm(norm) @@ -144,7 +144,7 @@ def fft2(x1, s=None, axes=(-2, -1), norm=None): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc: if norm is not None: pass @@ -185,7 +185,7 @@ def fftn(x1, s=None, axes=None, norm=None): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc: if s is None: boundaries = tuple([x1_desc.shape[i] for i in range(x1_desc.ndim)]) @@ -231,7 +231,7 @@ def fftshift(x1, axes=None): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc and 0: norm_= Norm.backward @@ -263,7 +263,7 @@ def hfft(x1, n=None, axis=-1, norm=None): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc and 0: norm_ = get_validated_norm(norm) @@ -305,7 +305,7 @@ def ifft(x1, n=None, axis=-1, norm=None): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc: norm_ = get_validated_norm(norm) @@ -348,7 +348,7 @@ def ifft2(x1, s=None, axes=(-2, -1), norm=None): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc: if norm is not None: pass @@ -372,7 +372,7 @@ def ifftshift(x1, axes=None): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc and 0: norm_ = Norm.backward @@ -385,7 +385,7 @@ def ifftshift(x1, axes=None): if x1_desc.size < 1: pass # let fallback to handle exception else: - return dpnp_fft(x1_desc, input_boundarie, output_boundarie, axis_param, False, norm_.value).get_pyobj() + return dpnp_fft(x1_desc, input_boundarie, output_boundarie, axis_param, True, norm_.value).get_pyobj() return call_origin(numpy.fft.ifftshift, x1, axes) @@ -406,7 +406,7 @@ def ifftn(x1, s=None, axes=None, norm=None): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc and 0: if s is None: boundaries = tuple([x1_desc.shape[i] for i in range(x1_desc.ndim)]) @@ -453,7 +453,7 @@ def ihfft(x1, n=None, axis=-1, norm=None): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc and 0: norm_ = get_validated_norm(norm) @@ -478,7 +478,7 @@ def ihfft(x1, n=None, axis=-1, norm=None): else: output_boundarie = input_boundarie - return dpnp_fft(x1_desc, input_boundarie, output_boundarie, axis_param, False, norm_.value).get_pyobj() + return dpnp_fft(x1_desc, input_boundarie, output_boundarie, axis_param, True, norm_.value).get_pyobj() return call_origin(numpy.fft.ihfft, x1, n, axis, norm) @@ -497,7 +497,7 @@ def irfft(x1, n=None, axis=-1, norm=None): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc and 0: norm_ = get_validated_norm(norm) @@ -548,7 +548,7 @@ def irfft2(x1, s=None, axes=(-2, -1), norm=None): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc: if norm is not None: pass @@ -574,7 +574,7 @@ def irfftn(x1, s=None, axes=None, norm=None): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc and 0: if s is None: boundaries = tuple([x1_desc.shape[i] for i in range(x1_desc.ndim)]) @@ -621,7 +621,7 @@ def rfft(x1, n=None, axis=-1, norm=None): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc: norm_ = get_validated_norm(norm) @@ -670,7 +670,7 @@ def rfft2(x1, s=None, axes=(-2, -1), norm=None): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc: if norm is not None: pass @@ -711,7 +711,7 @@ def rfftn(x1, s=None, axes=None, norm=None): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc and 0: if s is None: boundaries = tuple([x1_desc.shape[i] for i in range(x1_desc.ndim)]) @@ -738,7 +738,7 @@ def rfftn(x1, s=None, axes=None, norm=None): except IndexError: checker_throw_axis_error("fft.rfftn", "is out of bounds", param_axis, f"< {len(boundaries)}") - x1_iter_desc = dpnp.get_dpnp_descriptor(x1_iter) + x1_iter_desc = dpnp.get_dpnp_descriptor(x1_iter, copy_when_nondefault_queue=False) x1_iter = rfft(x1_iter_desc.get_pyobj(), n=param_n, axis=param_axis, norm=norm) return x1_iter diff --git a/dpnp/linalg/dpnp_algo_linalg.pyx b/dpnp/linalg/dpnp_algo_linalg.pyx index 04efad5c600f..e6b239eb880b 100644 --- a/dpnp/linalg/dpnp_algo_linalg.pyx +++ b/dpnp/linalg/dpnp_algo_linalg.pyx @@ -1,7 +1,7 @@ # cython: language_level=3 # -*- coding: utf-8 -*- # ***************************************************************************** -# Copyright (c) 2016-2020, Intel Corporation +# Copyright (c) 2016-2022, Intel Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -56,12 +56,24 @@ __all__ = [ # C function pointer to the C library template functions -ctypedef void(*custom_linalg_1in_1out_func_ptr_t)(void *, void * , shape_elem_type * , size_t) -ctypedef void(*custom_linalg_1in_1out_func_ptr_t_)(void * , void * , size_t * ) -ctypedef void(*custom_linalg_1in_1out_with_size_func_ptr_t_)(void *, void * , size_t) -ctypedef void(*custom_linalg_1in_1out_with_2size_func_ptr_t_)(void *, void * , size_t, size_t) -ctypedef void(*custom_linalg_1in_3out_shape_t)(void *, void * , void * , void * , size_t , size_t ) -ctypedef void(*custom_linalg_2in_1out_func_ptr_t)(void *, void * , void * , size_t ) +ctypedef c_dpctl.DPCTLSyclEventRef(*custom_linalg_1in_1out_func_ptr_t)(c_dpctl.DPCTLSyclQueueRef, + void *, void * ,shape_elem_type * , + size_t, const c_dpctl.DPCTLEventVectorRef) +ctypedef c_dpctl.DPCTLSyclEventRef(*custom_linalg_1in_1out_func_ptr_t_)(c_dpctl.DPCTLSyclQueueRef, + void * , void * , size_t * , + const c_dpctl.DPCTLEventVectorRef) +ctypedef c_dpctl.DPCTLSyclEventRef(*custom_linalg_1in_1out_with_size_func_ptr_t_)(c_dpctl.DPCTLSyclQueueRef, + void *, void * , size_t, + const c_dpctl.DPCTLEventVectorRef) +ctypedef c_dpctl.DPCTLSyclEventRef(*custom_linalg_1in_1out_with_2size_func_ptr_t_)(c_dpctl.DPCTLSyclQueueRef, + void *, void * , size_t, size_t, + const c_dpctl.DPCTLEventVectorRef) +ctypedef c_dpctl.DPCTLSyclEventRef(*custom_linalg_1in_3out_shape_t)(c_dpctl.DPCTLSyclQueueRef, + void *, void * , void * , void * , + size_t , size_t, const c_dpctl.DPCTLEventVectorRef) +ctypedef c_dpctl.DPCTLSyclEventRef(*custom_linalg_2in_1out_func_ptr_t)(c_dpctl.DPCTLSyclQueueRef, + void *, void * , void * , size_t, + const c_dpctl.DPCTLEventVectorRef) cpdef utils.dpnp_descriptor dpnp_cholesky(utils.dpnp_descriptor input_): @@ -69,14 +81,34 @@ cpdef utils.dpnp_descriptor dpnp_cholesky(utils.dpnp_descriptor input_): cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(input_.dtype) - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_CHOLESKY, param1_type, param1_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_CHOLESKY_EXT, param1_type, param1_type) + + input_obj = input_.get_array() # ceate result array with type given by FPTR data - cdef utils.dpnp_descriptor result = utils.create_output_descriptor(input_.shape, kernel_data.return_type, None) + cdef utils.dpnp_descriptor result = utils.create_output_descriptor(input_.shape, + kernel_data.return_type, + None, + device=input_obj.sycl_device, + usm_type=input_obj.usm_type, + sycl_queue=input_obj.sycl_queue) + + result_sycl_queue = result.get_array().sycl_queue + + cdef c_dpctl.SyclQueue q = result_sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() cdef custom_linalg_1in_1out_with_2size_func_ptr_t_ func = kernel_data.ptr - func(input_.get_data(), result.get_data(), input_.size, size_) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, + input_.get_data(), + result.get_data(), + input_.size, + size_, + NULL) # dep_events_ref + + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) return result @@ -119,14 +151,34 @@ cpdef utils.dpnp_descriptor dpnp_det(utils.dpnp_descriptor input): cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(input.dtype) - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_DET, param1_type, param1_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_DET_EXT, param1_type, param1_type) + + input_obj = input.get_array() # ceate result array with type given by FPTR data - cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, kernel_data.return_type, None) + cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, + kernel_data.return_type, + None, + device=input_obj.sycl_device, + usm_type=input_obj.usm_type, + sycl_queue=input_obj.sycl_queue) + + result_sycl_queue = result.get_array().sycl_queue + + cdef c_dpctl.SyclQueue q = result_sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() cdef custom_linalg_1in_1out_func_ptr_t func = kernel_data.ptr - func(input.get_data(), result.get_data(), input_shape.data(), input.ndim) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, + input.get_data(), + result.get_data(), + input_shape.data(), + input.ndim, + NULL) # dep_events_ref + + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) return result @@ -137,16 +189,41 @@ cpdef tuple dpnp_eig(utils.dpnp_descriptor x1): cdef size_t size = 0 if x1_shape.empty() else x1_shape.front() cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(x1.dtype) - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_EIG, param1_type, param1_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_EIG_EXT, param1_type, param1_type) result_type = dpnp_DPNPFuncType_to_dtype(< size_t > kernel_data.return_type) - cdef utils.dpnp_descriptor res_val = utils.create_output_descriptor((size,), kernel_data.return_type, None) - cdef utils.dpnp_descriptor res_vec = utils.create_output_descriptor(x1_shape, kernel_data.return_type, None) + x1_obj = x1.get_array() + + cdef utils.dpnp_descriptor res_val = utils.create_output_descriptor((size,), + kernel_data.return_type, + None, + device=x1_obj.sycl_device, + usm_type=x1_obj.usm_type, + sycl_queue=x1_obj.sycl_queue) + cdef utils.dpnp_descriptor res_vec = utils.create_output_descriptor(x1_shape, + kernel_data.return_type, + None, + device=x1_obj.sycl_device, + usm_type=x1_obj.usm_type, + sycl_queue=x1_obj.sycl_queue) + + result_sycl_queue = res_val.get_array().sycl_queue + + cdef c_dpctl.SyclQueue q = result_sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() cdef custom_linalg_2in_1out_func_ptr_t func = kernel_data.ptr # call FPTR function - func(x1.get_data(), res_val.get_data(), res_vec.get_data(), size) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, + x1.get_data(), + res_val.get_data(), + res_vec.get_data(), + size, + NULL) # dep_events_ref + + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) return (res_val.get_pyobj(), res_vec.get_pyobj()) @@ -157,14 +234,33 @@ cpdef utils.dpnp_descriptor dpnp_eigvals(utils.dpnp_descriptor input): cdef size_t size = 0 if input_shape.empty() else input_shape.front() cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(input.dtype) - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_EIGVALS, param1_type, param1_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_EIGVALS_EXT, param1_type, param1_type) + + input_obj = input.get_array() # ceate result array with type given by FPTR data - cdef utils.dpnp_descriptor res_val = utils.create_output_descriptor((size,), kernel_data.return_type, None) + cdef utils.dpnp_descriptor res_val = utils.create_output_descriptor((size,), + kernel_data.return_type, + None, + device=input_obj.sycl_device, + usm_type=input_obj.usm_type, + sycl_queue=input_obj.sycl_queue) + + result_sycl_queue = res_val.get_array().sycl_queue + + cdef c_dpctl.SyclQueue q = result_sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() cdef custom_linalg_1in_1out_with_size_func_ptr_t_ func = kernel_data.ptr # call FPTR function - func(input.get_data(), res_val.get_data(), size) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, + input.get_data(), + res_val.get_data(), + size, + NULL) # dep_events_ref + + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) return res_val @@ -174,14 +270,34 @@ cpdef utils.dpnp_descriptor dpnp_inv(utils.dpnp_descriptor input): cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(input.dtype) - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_INV, param1_type, param1_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_INV_EXT, param1_type, param1_type) + + input_obj = input.get_array() # ceate result array with type given by FPTR data - cdef utils.dpnp_descriptor result = utils.create_output_descriptor(input_shape, kernel_data.return_type, None) + cdef utils.dpnp_descriptor result = utils.create_output_descriptor(input_shape, + kernel_data.return_type, + None, + device=input_obj.sycl_device, + usm_type=input_obj.usm_type, + sycl_queue=input_obj.sycl_queue) + + result_sycl_queue = result.get_array().sycl_queue + + cdef c_dpctl.SyclQueue q = result_sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() cdef custom_linalg_1in_1out_func_ptr_t func = kernel_data.ptr - func(input.get_data(), result.get_data(), input_shape.data(), input.ndim) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, + input.get_data(), + result.get_data(), + input_shape.data(), + input.ndim, + NULL) # dep_events_ref + + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) return result @@ -190,14 +306,34 @@ cpdef utils.dpnp_descriptor dpnp_matrix_rank(utils.dpnp_descriptor input): cdef shape_type_c input_shape = input.shape cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(input.dtype) - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_MATRIX_RANK, param1_type, param1_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_MATRIX_RANK_EXT, param1_type, param1_type) + + input_obj = input.get_array() # ceate result array with type given by FPTR data - cdef utils.dpnp_descriptor result = utils.create_output_descriptor((1,), kernel_data.return_type, None) + cdef utils.dpnp_descriptor result = utils.create_output_descriptor((1,), + kernel_data.return_type, + None, + device=input_obj.sycl_device, + usm_type=input_obj.usm_type, + sycl_queue=input_obj.sycl_queue) + + result_sycl_queue = result.get_array().sycl_queue + + cdef c_dpctl.SyclQueue q = result_sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() cdef custom_linalg_1in_1out_func_ptr_t func = kernel_data.ptr - func(input.get_data(), result.get_data(), input_shape.data(), input.ndim) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, + input.get_data(), + result.get_data(), + input_shape.data(), + input.ndim, + NULL) # dep_events_ref + + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) return result @@ -312,15 +448,47 @@ cpdef tuple dpnp_qr(utils.dpnp_descriptor x1, str mode): cdef size_t size_tau = min_m_n cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(x1.dtype) - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_QR, param1_type, param1_type) - - cdef utils.dpnp_descriptor res_q = utils.create_output_descriptor((size_m, min_m_n), kernel_data.return_type, None) - cdef utils.dpnp_descriptor res_r = utils.create_output_descriptor((min_m_n, size_n), kernel_data.return_type, None) - cdef utils.dpnp_descriptor tau = utils.create_output_descriptor((size_tau, ), kernel_data.return_type, None) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_QR_EXT, param1_type, param1_type) + + x1_obj = x1.get_array() + + cdef utils.dpnp_descriptor res_q = utils.create_output_descriptor((size_m, min_m_n), + kernel_data.return_type, + None, + device=x1_obj.sycl_device, + usm_type=x1_obj.usm_type, + sycl_queue=x1_obj.sycl_queue) + cdef utils.dpnp_descriptor res_r = utils.create_output_descriptor((min_m_n, size_n), + kernel_data.return_type, + None, + device=x1_obj.sycl_device, + usm_type=x1_obj.usm_type, + sycl_queue=x1_obj.sycl_queue) + cdef utils.dpnp_descriptor tau = utils.create_output_descriptor((size_tau, ), + kernel_data.return_type, + None, + device=x1_obj.sycl_device, + usm_type=x1_obj.usm_type, + sycl_queue=x1_obj.sycl_queue) + + result_sycl_queue = res_q.get_array().sycl_queue + + cdef c_dpctl.SyclQueue q = result_sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() cdef custom_linalg_1in_3out_shape_t func = < custom_linalg_1in_3out_shape_t > kernel_data.ptr - func(x1.get_data(), res_q.get_data(), res_r.get_data(), tau.get_data(), size_m, size_n) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, + x1.get_data(), + res_q.get_data(), + res_r.get_data(), + tau.get_data(), + size_m, + size_n, + NULL) # dep_events_ref + + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) return (res_q.get_pyobj(), res_r.get_pyobj()) @@ -331,18 +499,50 @@ cpdef tuple dpnp_svd(utils.dpnp_descriptor x1, cpp_bool full_matrices, cpp_bool cdef size_t size_s = min(size_m, size_n) cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(x1.dtype) - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_SVD, param1_type, param1_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_SVD_EXT, param1_type, param1_type) cdef DPNPFuncType type_s = DPNP_FT_DOUBLE if x1.dtype == dpnp.float32: type_s = DPNP_FT_FLOAT - cdef utils.dpnp_descriptor res_u = utils.create_output_descriptor((size_m, size_m), kernel_data.return_type, None) - cdef utils.dpnp_descriptor res_s = utils.create_output_descriptor((size_s, ), type_s, None) - cdef utils.dpnp_descriptor res_vt = utils.create_output_descriptor((size_n, size_n), kernel_data.return_type, None) + x1_obj = x1.get_array() + + cdef utils.dpnp_descriptor res_u = utils.create_output_descriptor((size_m, size_m), + kernel_data.return_type, + None, + device=x1_obj.sycl_device, + usm_type=x1_obj.usm_type, + sycl_queue=x1_obj.sycl_queue) + cdef utils.dpnp_descriptor res_s = utils.create_output_descriptor((size_s, ), + type_s, + None, + device=x1_obj.sycl_device, + usm_type=x1_obj.usm_type, + sycl_queue=x1_obj.sycl_queue) + cdef utils.dpnp_descriptor res_vt = utils.create_output_descriptor((size_n, size_n), + kernel_data.return_type, + None, + device=x1_obj.sycl_device, + usm_type=x1_obj.usm_type, + sycl_queue=x1_obj.sycl_queue) + + result_sycl_queue = res_u.get_array().sycl_queue + + cdef c_dpctl.SyclQueue q = result_sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() cdef custom_linalg_1in_3out_shape_t func = < custom_linalg_1in_3out_shape_t > kernel_data.ptr - func(x1.get_data(), res_u.get_data(), res_s.get_data(), res_vt.get_data(), size_m, size_n) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, + x1.get_data(), + res_u.get_data(), + res_s.get_data(), + res_vt.get_data(), + size_m, + size_n, + NULL) # dep_events_ref + + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) return (res_u.get_pyobj(), res_s.get_pyobj(), res_vt.get_pyobj()) diff --git a/dpnp/linalg/dpnp_iface_linalg.py b/dpnp/linalg/dpnp_iface_linalg.py index 2db35c5d2ace..43a26c1b5306 100644 --- a/dpnp/linalg/dpnp_iface_linalg.py +++ b/dpnp/linalg/dpnp_iface_linalg.py @@ -88,14 +88,14 @@ def cholesky(input): matrix object if `input` is a matrix object. """ - x1_desc = dpnp.get_dpnp_descriptor(input) + x1_desc = dpnp.get_dpnp_descriptor(input, copy_when_nondefault_queue=False) if x1_desc: if x1_desc.shape[-1] != x1_desc.shape[-2]: pass else: if input.dtype == dpnp.int32 or input.dtype == dpnp.int64: # TODO memory copy. needs to move into DPNPC - input_ = dpnp.get_dpnp_descriptor(dpnp.astype(input, dpnp.float64)) + input_ = dpnp.get_dpnp_descriptor(dpnp.astype(input, dpnp.float64), copy_when_nondefault_queue=False) else: input_ = x1_desc return dpnp_cholesky(input_).get_pyobj() @@ -145,7 +145,7 @@ def det(input): Determinant of `input`. """ - x1_desc = dpnp.get_dpnp_descriptor(input) + x1_desc = dpnp.get_dpnp_descriptor(input, copy_when_nondefault_queue=False) if x1_desc: if x1_desc.shape[-1] == x1_desc.shape[-2]: result_obj = dpnp_det(x1_desc).get_pyobj() @@ -164,7 +164,7 @@ def eig(x1): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc: if (x1_desc.size > 0): return dpnp_eig(x1_desc) @@ -191,7 +191,7 @@ def eigvals(input): real for real matrices. """ - x1_desc = dpnp.get_dpnp_descriptor(input) + x1_desc = dpnp.get_dpnp_descriptor(input, copy_when_nondefault_queue=False) if x1_desc: if x1_desc.size > 0: return dpnp_eigvals(x1_desc).get_pyobj() @@ -213,7 +213,7 @@ def inv(input): Otherwise the function will be executed sequentially on CPU. """ - x1_desc = dpnp.get_dpnp_descriptor(input) + x1_desc = dpnp.get_dpnp_descriptor(input, copy_when_nondefault_queue=False) if x1_desc: if x1_desc.ndim == 2 and x1_desc.shape[0] == x1_desc.shape[1] and x1_desc.shape[0] >= 2: return dpnp_inv(x1_desc).get_pyobj() @@ -277,7 +277,7 @@ def matrix_rank(input, tol=None, hermitian=False): """ - x1_desc = dpnp.get_dpnp_descriptor(input) + x1_desc = dpnp.get_dpnp_descriptor(input, copy_when_nondefault_queue=False) if x1_desc: if tol is not None: pass @@ -362,7 +362,7 @@ def norm(x1, ord=None, axis=None, keepdims=False): Norm of the matrix or vector(s). """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc: if not isinstance(axis, int) and not isinstance(axis, tuple) and axis is not None: pass @@ -395,7 +395,7 @@ def qr(x1, mode='reduced'): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc: if mode != 'reduced': pass @@ -464,7 +464,7 @@ def svd(x1, full_matrices=True, compute_uv=True, hermitian=False): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc: if not x1_desc.ndim == 2: pass diff --git a/tests/skipped_tests.tbl b/tests/skipped_tests.tbl index b781e3772021..424c84158f0a 100644 --- a/tests/skipped_tests.tbl +++ b/tests/skipped_tests.tbl @@ -5,60 +5,29 @@ tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-remainder-data15-data25 tests/test_sycl_queue.py::test_broadcasting[opencl:cpu:0-floor_divide-data12-data22] tests/test_sycl_queue.py::test_broadcasting[opencl:cpu:0-remainder-data15-data25] +tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_1_{axes=None, norm=None, s=(1, None), shape=(3, 4)}::test_fft2 +tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_7_{axes=(), norm=None, s=None, shape=(3, 4)}::test_fft2 +tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_7_{axes=(), norm=None, s=None, shape=(3, 4)}::test_ifft2 +tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_9_{axes=None, norm=None, s=(1, 4, None), shape=(2, 3, 4)}::test_fft2 tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_15_{axes=(), norm=None, s=None, shape=(2, 3, 4)}::test_fft2 tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_15_{axes=(), norm=None, s=None, shape=(2, 3, 4)}::test_ifft2 tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_16_{axes=(0, 1, 2), norm='ortho', s=(2, 3), shape=(2, 3, 4)}::test_fft2 tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_16_{axes=(0, 1, 2), norm='ortho', s=(2, 3), shape=(2, 3, 4)}::test_ifft2 tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_18_{axes=None, norm=None, s=None, shape=(0, 5)}::test_fft2 -tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_18_{axes=None, norm=None, s=None, shape=(0, 5)}::test_ifft2 tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_19_{axes=None, norm=None, s=None, shape=(2, 0, 5)}::test_fft2 -tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_19_{axes=None, norm=None, s=None, shape=(2, 0, 5)}::test_ifft2 -tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_1_{axes=None, norm=None, s=(1, None), shape=(3, 4)}::test_fft2 -tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_1_{axes=None, norm=None, s=(1, None), shape=(3, 4)}::test_ifft2 tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_20_{axes=None, norm=None, s=None, shape=(0, 0, 5)}::test_fft2 -tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_20_{axes=None, norm=None, s=None, shape=(0, 0, 5)}::test_ifft2 -tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_7_{axes=(), norm=None, s=None, shape=(3, 4)}::test_fft2 -tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_7_{axes=(), norm=None, s=None, shape=(3, 4)}::test_ifft2 -tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_9_{axes=None, norm=None, s=(1, 4, None), shape=(2, 3, 4)}::test_fft2 -tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_9_{axes=None, norm=None, s=(1, 4, None), shape=(2, 3, 4)}::test_ifft2 -tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_10_{axes=None, norm=None, s=(1, 4, None), shape=(2, 3, 4)}::test_fftn -tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_10_{axes=None, norm=None, s=(1, 4, None), shape=(2, 3, 4)}::test_ifftn + +tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_1_{axes=None, norm=None, s=(1, None), shape=(3, 4)}::test_fftn +tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_7_{axes=(), norm=None, s=None, shape=(3, 4)}::test_fftn +tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_7_{axes=(), norm=None, s=None, shape=(3, 4)}::test_ifftn tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_17_{axes=(), norm='ortho', s=None, shape=(2, 3, 4)}::test_fftn tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_17_{axes=(), norm='ortho', s=None, shape=(2, 3, 4)}::test_ifftn tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_18_{axes=(0, 1, 2), norm='ortho', s=(2, 3), shape=(2, 3, 4)}::test_fftn tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_18_{axes=(0, 1, 2), norm='ortho', s=(2, 3), shape=(2, 3, 4)}::test_ifftn -tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_1_{axes=None, norm=None, s=(1, None), shape=(3, 4)}::test_fftn -tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_1_{axes=None, norm=None, s=(1, None), shape=(3, 4)}::test_ifftn +tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_10_{axes=None, norm=None, s=(1, 4, None), shape=(2, 3, 4)}::test_fftn tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_21_{axes=None, norm=None, s=None, shape=(0, 5)}::test_fftn -tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_21_{axes=None, norm=None, s=None, shape=(0, 5)}::test_ifftn tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_22_{axes=None, norm=None, s=None, shape=(2, 0, 5)}::test_fftn -tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_22_{axes=None, norm=None, s=None, shape=(2, 0, 5)}::test_ifftn tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_23_{axes=None, norm=None, s=None, shape=(0, 0, 5)}::test_fftn -tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_23_{axes=None, norm=None, s=None, shape=(0, 0, 5)}::test_ifftn -tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_7_{axes=(), norm=None, s=None, shape=(3, 4)}::test_fftn -tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_7_{axes=(), norm=None, s=None, shape=(3, 4)}::test_ifftn - -tests/test_linalg.py::test_eig_arange[2-float64] -tests/test_linalg.py::test_eig_arange[2-float32] -tests/test_linalg.py::test_eig_arange[2-int64] -tests/test_linalg.py::test_eig_arange[2-int32] -tests/test_linalg.py::test_eig_arange[4-float64] -tests/test_linalg.py::test_eig_arange[4-float32] -tests/test_linalg.py::test_eig_arange[4-int64] -tests/test_linalg.py::test_eig_arange[4-int32] -tests/test_linalg.py::test_eig_arange[8-float64] -tests/test_linalg.py::test_eig_arange[8-float32] -tests/test_linalg.py::test_eig_arange[8-int64] -tests/test_linalg.py::test_eig_arange[8-int32] -tests/test_linalg.py::test_eig_arange[16-float64] -tests/test_linalg.py::test_eig_arange[16-float32] -tests/test_linalg.py::test_eig_arange[16-int64] -tests/test_linalg.py::test_eig_arange[16-int32] -tests/test_linalg.py::test_eig_arange[300-float64] -tests/test_linalg.py::test_eig_arange[300-float32] -tests/test_linalg.py::test_eig_arange[300-int64] -tests/test_linalg.py::test_eig_arange[300-int32] -tests/test_linalg.py::test_eigvals tests/third_party/intel/test_zero_copy_test1.py::test_dpnp_interaction_with_dpctl_memory @@ -113,22 +82,120 @@ tests/test_dparray.py::test_astype[[]-complex-int32] tests/test_dparray.py::test_astype[[]-complex-bool] tests/test_dparray.py::test_astype[[]-complex-bool_] tests/test_dparray.py::test_astype[[]-complex-complex] -tests/test_linalg.py::test_cond[-1-[[1, 2, 3], [4, 5, 6], [7, 8, 9]]] + +tests/test_linalg.py::test_cond[None-[[1, 0, -1], [0, 1, 0], [1, 0, 1]]] +tests/test_linalg.py::test_cond[None-[[1, 2, 3], [4, 5, 6], [7, 8, 9]]] +tests/test_linalg.py::test_cond[1-[[1, 0, -1], [0, 1, 0], [1, 0, 1]]] tests/test_linalg.py::test_cond[1-[[1, 2, 3], [4, 5, 6], [7, 8, 9]]] -tests/test_linalg.py::test_cond[-2-[[1, 0, -1], [0, 1, 0], [1, 0, 1]]] +tests/test_linalg.py::test_cond[-1-[[1, 0, -1], [0, 1, 0], [1, 0, 1]]] +tests/test_linalg.py::test_cond[-1-[[1, 2, 3], [4, 5, 6], [7, 8, 9]]] tests/test_linalg.py::test_cond[2-[[1, 0, -1], [0, 1, 0], [1, 0, 1]]] -tests/test_linalg.py::test_cond[-2-[[1, 2, 3], [4, 5, 6], [7, 8, 9]]] tests/test_linalg.py::test_cond[2-[[1, 2, 3], [4, 5, 6], [7, 8, 9]]] +tests/test_linalg.py::test_cond[-2-[[1, 0, -1], [0, 1, 0], [1, 0, 1]]] +tests/test_linalg.py::test_cond[-2-[[1, 2, 3], [4, 5, 6], [7, 8, 9]]] +tests/test_linalg.py::test_cond[numpy.inf-[[1, 0, -1], [0, 1, 0], [1, 0, 1]]] +tests/test_linalg.py::test_cond[numpy.inf-[[1, 2, 3], [4, 5, 6], [7, 8, 9]]] +tests/test_linalg.py::test_cond[-numpy.inf-[[1, 0, -1], [0, 1, 0], [1, 0, 1]]] +tests/test_linalg.py::test_cond[-numpy.inf-[[1, 2, 3], [4, 5, 6], [7, 8, 9]]] tests/test_linalg.py::test_cond["fro"-[[1, 0, -1], [0, 1, 0], [1, 0, 1]]] tests/test_linalg.py::test_cond["fro"-[[1, 2, 3], [4, 5, 6], [7, 8, 9]]] -tests/test_linalg.py::test_cond[None-[[1, 0, -1], [0, 1, 0], [1, 0, 1]]] -tests/test_linalg.py::test_cond[None-[[1, 2, 3], [4, 5, 6], [7, 8, 9]]] -tests/test_linalg.py::test_cond[-numpy.inf-[[1, 2, 3], [4, 5, 6], [7, 8, 9]]] -tests/test_linalg.py::test_cond[numpy.inf-[[1, 2, 3], [4, 5, 6], [7, 8, 9]]] -tests/test_linalg.py::test_svd[(2,2)-complex128] -tests/test_linalg.py::test_svd[(3,4)-complex128] -tests/test_linalg.py::test_svd[(5,3)-complex128] -tests/test_linalg.py::test_svd[(16,16)-complex128] + +tests/test_linalg.py::test_matrix_rank[None-[0, 1]-float64] +tests/test_linalg.py::test_matrix_rank[None-[0, 1]-float32] +tests/test_linalg.py::test_matrix_rank[None-[0, 1]-int64] +tests/test_linalg.py::test_matrix_rank[None-[0, 1]-int32] +tests/test_linalg.py::test_matrix_rank[None-[[1, 2], [1, 2]]-float64] +tests/test_linalg.py::test_matrix_rank[None-[[1, 2], [1, 2]]-float32] +tests/test_linalg.py::test_matrix_rank[None-[[1, 2], [1, 2]]-int64] +tests/test_linalg.py::test_matrix_rank[None-[[1, 2], [1, 2]]-int32] +tests/test_linalg.py::test_matrix_rank[None-[[1, 2], [3, 4]]-float64] +tests/test_linalg.py::test_matrix_rank[None-[[1, 2], [3, 4]]-float32] +tests/test_linalg.py::test_matrix_rank[None-[[1, 2], [3, 4]]-int64] +tests/test_linalg.py::test_matrix_rank[None-[[1, 2], [3, 4]]-int32] + +tests/test_linalg.py::test_norm1[0-None-[7]] +tests/test_linalg.py::test_norm1[0-None-[1, 2]] +tests/test_linalg.py::test_norm1[0-None-[1, 0]] +tests/test_linalg.py::test_norm1[0-3-[7]] +tests/test_linalg.py::test_norm1[0-3-[1, 2]] +tests/test_linalg.py::test_norm1[0-3-[1, 0]] +tests/test_linalg.py::test_norm1[None-3-[7]] +tests/test_linalg.py::test_norm1[None-3-[1, 2]] +tests/test_linalg.py::test_norm1[None-3-[1, 0]] + +tests/test_linalg.py::test_norm2[(0, 1)-None-[[1, 0]]] +tests/test_linalg.py::test_norm2[(0, 1)-None-[[1, 2]]] +tests/test_linalg.py::test_norm2[(0, 1)-None-[[1, 0], [3, 0]]] +tests/test_linalg.py::test_norm2[(0, 1)-None-[[1, 2], [3, 4]]] +tests/test_linalg.py::test_norm2[(0, 1)-"fro"-[[1, 0]]] +tests/test_linalg.py::test_norm2[(0, 1)-"fro"-[[1, 2]]] +tests/test_linalg.py::test_norm2[(0, 1)-"fro"-[[1, 0], [3, 0]]] +tests/test_linalg.py::test_norm2[(0, 1)-"fro"-[[1, 2], [3, 4]]] +tests/test_linalg.py::test_norm2[None-None-[[1, 2]]] +tests/test_linalg.py::test_norm2[None-None-[[1, 0], [3, 0]]] +tests/test_linalg.py::test_norm2[None-None-[[1, 2], [3, 4]]] +tests/test_linalg.py::test_norm2[None-"fro"-[[1, 2]]] +tests/test_linalg.py::test_norm2[None-"fro"-[[1, 0], [3, 0]]] +tests/test_linalg.py::test_norm2[None-"fro"-[[1, 2], [3, 4]]] + +tests/test_linalg.py::test_norm3[0-None-[[[1, 2], [3, 4]], [[5, 6], [7, 8]]]] +tests/test_linalg.py::test_norm3[0-None-[[[1, 0], [3, 0]], [[5, 0], [7, 0]]]] +tests/test_linalg.py::test_norm3[0--2-[[[1, 2], [3, 4]], [[5, 6], [7, 8]]]] +tests/test_linalg.py::test_norm3[0--2-[[[1, 0], [3, 0]], [[5, 0], [7, 0]]]] +tests/test_linalg.py::test_norm3[0--1-[[[1, 2], [3, 4]], [[5, 6], [7, 8]]]] +tests/test_linalg.py::test_norm3[0--1-[[[1, 0], [3, 0]], [[5, 0], [7, 0]]]] +tests/test_linalg.py::test_norm3[0-2-[[[1, 2], [3, 4]], [[5, 6], [7, 8]]]] +tests/test_linalg.py::test_norm3[0-2-[[[1, 0], [3, 0]], [[5, 0], [7, 0]]]] +tests/test_linalg.py::test_norm3[1-None-[[[1, 2], [3, 4]], [[5, 6], [7, 8]]]] +tests/test_linalg.py::test_norm3[1-None-[[[1, 0], [3, 0]], [[5, 0], [7, 0]]]] +tests/test_linalg.py::test_norm3[1--2-[[[1, 2], [3, 4]], [[5, 6], [7, 8]]]] +tests/test_linalg.py::test_norm3[1--2-[[[1, 0], [3, 0]], [[5, 0], [7, 0]]]] +tests/test_linalg.py::test_norm3[1--1-[[[1, 2], [3, 4]], [[5, 6], [7, 8]]]] +tests/test_linalg.py::test_norm3[1--1-[[[1, 0], [3, 0]], [[5, 0], [7, 0]]]] +tests/test_linalg.py::test_norm3[1-2-[[[1, 2], [3, 4]], [[5, 6], [7, 8]]]] +tests/test_linalg.py::test_norm3[1-2-[[[1, 0], [3, 0]], [[5, 0], [7, 0]]]] +tests/test_linalg.py::test_norm3[2-None-[[[1, 2], [3, 4]], [[5, 6], [7, 8]]]] +tests/test_linalg.py::test_norm3[2-None-[[[1, 0], [3, 0]], [[5, 0], [7, 0]]]] +tests/test_linalg.py::test_norm3[2--2-[[[1, 2], [3, 4]], [[5, 6], [7, 8]]]] +tests/test_linalg.py::test_norm3[2--1-[[[1, 2], [3, 4]], [[5, 6], [7, 8]]]] +tests/test_linalg.py::test_norm3[2-2-[[[1, 2], [3, 4]], [[5, 6], [7, 8]]]] +tests/test_linalg.py::test_norm3[(0, 1)-None-[[[1, 2], [3, 4]], [[5, 6], [7, 8]]]] +tests/test_linalg.py::test_norm3[(0, 1)-None-[[[1, 0], [3, 0]], [[5, 0], [7, 0]]]] +tests/test_linalg.py::test_norm3[(0, 1)--2-[[[1, 2], [3, 4]], [[5, 6], [7, 8]]]] +tests/test_linalg.py::test_norm3[(0, 1)--2-[[[1, 0], [3, 0]], [[5, 0], [7, 0]]]] +tests/test_linalg.py::test_norm3[(0, 1)-2-[[[1, 2], [3, 4]], [[5, 6], [7, 8]]]] +tests/test_linalg.py::test_norm3[(0, 1)-2-[[[1, 0], [3, 0]], [[5, 0], [7, 0]]]] +tests/test_linalg.py::test_norm3[(0, 2)-None-[[[1, 2], [3, 4]], [[5, 6], [7, 8]]]] +tests/test_linalg.py::test_norm3[(0, 2)-None-[[[1, 0], [3, 0]], [[5, 0], [7, 0]]]] +tests/test_linalg.py::test_norm3[(0, 2)--2-[[[1, 2], [3, 4]], [[5, 6], [7, 8]]]] +tests/test_linalg.py::test_norm3[(0, 2)-2-[[[1, 2], [3, 4]], [[5, 6], [7, 8]]]] +tests/test_linalg.py::test_norm3[(0, 2)-2-[[[1, 0], [3, 0]], [[5, 0], [7, 0]]]] +tests/test_linalg.py::test_norm3[(1, 2)-None-[[[1, 2], [3, 4]], [[5, 6], [7, 8]]]] +tests/test_linalg.py::test_norm3[(1, 2)-None-[[[1, 0], [3, 0]], [[5, 0], [7, 0]]]] +tests/test_linalg.py::test_norm3[(1, 2)--2-[[[1, 2], [3, 4]], [[5, 6], [7, 8]]]] +tests/test_linalg.py::test_norm3[(1, 2)-2-[[[1, 2], [3, 4]], [[5, 6], [7, 8]]]] +tests/test_linalg.py::test_norm3[(1, 2)-2-[[[1, 0], [3, 0]], [[5, 0], [7, 0]]]] + +tests/test_linalg.py::test_qr[complete-(2,2)-float64] +tests/test_linalg.py::test_qr[complete-(3,4)-float64] +tests/test_linalg.py::test_qr[complete-(3,4)-int64] +tests/test_linalg.py::test_qr[complete-(3,4)-int32] +tests/test_linalg.py::test_qr[complete-(5,3)-float64] +tests/test_linalg.py::test_qr[complete-(5,3)-int64] +tests/test_linalg.py::test_qr[complete-(5,3)-int32] +tests/test_linalg.py::test_qr[complete-(16,16)-float64] +tests/test_linalg.py::test_qr[complete-(16,16)-int64] +tests/test_linalg.py::test_qr[complete-(16,16)-int32] +tests/test_linalg.py::test_qr[reduced-(2,2)-float64] +tests/test_linalg.py::test_qr[reduced-(3,4)-float64] +tests/test_linalg.py::test_qr[reduced-(5,3)-float64] +tests/test_linalg.py::test_qr[reduced-(16,16)-float64] + +tests/test_linalg.py::test_svd[(2,2)-float64] +tests/test_linalg.py::test_svd[(3,4)-float64] +tests/test_linalg.py::test_svd[(5,3)-float64] +tests/test_linalg.py::test_svd[(16,16)-float64] + tests/test_mathematical.py::TestGradient::test_gradient_y1_dx[3.5-array1] tests/test_random.py::TestPermutationsTestShuffle::test_shuffle1[lambda x: (dpnp.asarray([(i, i) for i in x], [("a", int), ("b", int)]).view(dpnp.recarray))] tests/test_random.py::TestPermutationsTestShuffle::test_shuffle1[lambda x: dpnp.asarray([(i, i) for i in x], [("a", object), ("b", dpnp.int32)])]] diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl index d41fe24c3c70..eed4bafdb32a 100644 --- a/tests/skipped_tests_gpu.tbl +++ b/tests/skipped_tests_gpu.tbl @@ -91,47 +91,6 @@ tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-remainder-data15-data25 tests/test_sycl_queue.py::test_broadcasting[opencl:cpu:0-floor_divide-data12-data22] tests/test_sycl_queue.py::test_broadcasting[opencl:cpu:0-remainder-data15-data25] -tests/test_fft.py::test_fft_ndim[None-shape3-float32] -tests/test_fft.py::test_fft_ndim[None-shape3-float64] -tests/test_fft.py::test_fft_ndim[None-shape3-int32] -tests/test_fft.py::test_fft_ndim[None-shape3-int64] -tests/test_fft.py::test_fft_ndim[forward-shape3-float32] -tests/test_fft.py::test_fft_ndim[forward-shape3-float64] -tests/test_fft.py::test_fft_ndim[forward-shape3-int32] -tests/test_fft.py::test_fft_ndim[forward-shape3-int64] -tests/test_fft.py::test_fft_ndim[ortho-shape3-float32] -tests/test_fft.py::test_fft_ndim[ortho-shape3-float64] -tests/test_fft.py::test_fft_ndim[ortho-shape3-int32] -tests/test_fft.py::test_fft_ndim[ortho-shape3-int64] -tests/test_fft.py::test_fft_ifft[None-shape4-float32] -tests/test_fft.py::test_fft_ifft[None-shape4-float64] -tests/test_fft.py::test_fft_ifft[None-shape4-int32] -tests/test_fft.py::test_fft_ifft[None-shape4-int64] -tests/test_fft.py::test_fft_ifft[forward-shape4-float32] -tests/test_fft.py::test_fft_ifft[forward-shape4-float64] -tests/test_fft.py::test_fft_ifft[forward-shape4-int32] -tests/test_fft.py::test_fft_ifft[forward-shape4-int64] -tests/test_fft.py::test_fft_ifft[ortho-shape4-float32] -tests/test_fft.py::test_fft_ifft[ortho-shape4-float64] -tests/test_fft.py::test_fft_ifft[ortho-shape4-int32] -tests/test_fft.py::test_fft_ifft[ortho-shape4-int64] -tests/test_fft.py::test_fft_rfft[shape1-float32] -tests/test_fft.py::test_fft_rfft[shape1-float64] -tests/test_fft.py::test_fft_rfft[shape1-int32] -tests/test_fft.py::test_fft_rfft[shape1-int64] -tests/test_fft.py::test_fft_rfft[shape2-float32] -tests/test_fft.py::test_fft_rfft[shape2-float64] -tests/test_fft.py::test_fft_rfft[shape2-int32] -tests/test_fft.py::test_fft_rfft[shape2-int64] -tests/test_fft.py::test_fft_rfft[shape3-float32] -tests/test_fft.py::test_fft_rfft[shape3-float64] -tests/test_fft.py::test_fft_rfft[shape3-int32] -tests/test_fft.py::test_fft_rfft[shape3-int64] -tests/test_fft.py::test_fft_rfft[shape4-float32] -tests/test_fft.py::test_fft_rfft[shape4-float64] -tests/test_fft.py::test_fft_rfft[shape4-int32] -tests/test_fft.py::test_fft_rfft[shape4-int64] - tests/test_indexing.py::test_nonzero[[[1, 0], [1, 0]]] tests/test_indexing.py::test_nonzero[[[1, 2], [3, 4]]] tests/test_indexing.py::test_nonzero[[[0, 1, 2], [3, 0, 5], [6, 7, 0]]] @@ -380,10 +339,6 @@ tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_2_{extern tests/third_party/cupy/statistics_tests/test_correlation.py::TestCov::test_cov_empty tests/third_party/cupy/statistics_tests/test_meanvar.py::TestMeanVar::test_external_mean_axis -tests/test_linalg.py::test_eig_arange[16-float64] -tests/test_linalg.py::test_eig_arange[16-float32] -tests/test_linalg.py::test_eig_arange[16-int64] -tests/test_linalg.py::test_eig_arange[16-int32] tests/test_random.py::test_randn_normal_distribution tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_multidim_outer tests/third_party/cupy/random_tests/test_sample.py::TestRandintDtype::test_dtype @@ -445,6 +400,7 @@ tests/test_dparray.py::test_astype[[]-complex-int32] tests/test_dparray.py::test_astype[[]-complex-bool] tests/test_dparray.py::test_astype[[]-complex-bool_] tests/test_dparray.py::test_astype[[]-complex-complex] + tests/test_linalg.py::test_cond[-1-[[1, 2, 3], [4, 5, 6], [7, 8, 9]]] tests/test_linalg.py::test_cond[1-[[1, 2, 3], [4, 5, 6], [7, 8, 9]]] tests/test_linalg.py::test_cond[-2-[[1, 0, -1], [0, 1, 0], [1, 0, 1]]] @@ -457,10 +413,20 @@ tests/test_linalg.py::test_cond[None-[[1, 0, -1], [0, 1, 0], [1, 0, 1]]] tests/test_linalg.py::test_cond[None-[[1, 2, 3], [4, 5, 6], [7, 8, 9]]] tests/test_linalg.py::test_cond[-numpy.inf-[[1, 2, 3], [4, 5, 6], [7, 8, 9]]] tests/test_linalg.py::test_cond[numpy.inf-[[1, 2, 3], [4, 5, 6], [7, 8, 9]]] -tests/test_linalg.py::test_eig_arange[300-float32] -tests/test_linalg.py::test_eig_arange[300-float64] -tests/test_linalg.py::test_eig_arange[300-int32] -tests/test_linalg.py::test_eig_arange[300-int64] + +tests/test_linalg.py::test_matrix_rank[None-[0, 1]-float64] +tests/test_linalg.py::test_matrix_rank[None-[0, 1]-float32] +tests/test_linalg.py::test_matrix_rank[None-[0, 1]-int64] +tests/test_linalg.py::test_matrix_rank[None-[0, 1]-int32] +tests/test_linalg.py::test_matrix_rank[None-[[1, 2], [1, 2]]-float64] +tests/test_linalg.py::test_matrix_rank[None-[[1, 2], [1, 2]]-float32] +tests/test_linalg.py::test_matrix_rank[None-[[1, 2], [1, 2]]-int64] +tests/test_linalg.py::test_matrix_rank[None-[[1, 2], [1, 2]]-int32] +tests/test_linalg.py::test_matrix_rank[None-[[1, 2], [3, 4]]-float64] +tests/test_linalg.py::test_matrix_rank[None-[[1, 2], [3, 4]]-float32] +tests/test_linalg.py::test_matrix_rank[None-[[1, 2], [3, 4]]-int64] +tests/test_linalg.py::test_matrix_rank[None-[[1, 2], [3, 4]]-int32] + tests/test_random.py::TestPermutationsTestShuffle::test_shuffle1[lambda x: (dpnp.asarray([(i, i) for i in x], [("a", int), ("b", int)]).view(dpnp.recarray))] tests/test_random.py::TestPermutationsTestShuffle::test_shuffle1[lambda x: dpnp.asarray([(i, i) for i in x], [("a", object), ("b", dpnp.int32)])]] tests/test_random.py::TestPermutationsTestShuffle::test_shuffle1[lambda x: dpnp.asarray(x).astype(dpnp.int8)] @@ -739,54 +705,30 @@ tests/third_party/cupy/creation_tests/test_ranges.py::TestRanges::test_linspace_ tests/third_party/cupy/creation_tests/test_ranges.py::TestRanges::test_linspace_mixed_start_stop tests/third_party/cupy/creation_tests/test_ranges.py::TestRanges::test_linspace_mixed_start_stop2 tests/third_party/cupy/creation_tests/test_ranges.py::TestRanges::test_linspace_start_stop_list + +tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_1_{axes=None, norm=None, s=(1, None), shape=(3, 4)}::test_fft2 +tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_7_{axes=(), norm=None, s=None, shape=(3, 4)}::test_fft2 +tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_7_{axes=(), norm=None, s=None, shape=(3, 4)}::test_ifft2 +tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_9_{axes=None, norm=None, s=(1, 4, None), shape=(2, 3, 4)}::test_fft2 tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_15_{axes=(), norm=None, s=None, shape=(2, 3, 4)}::test_fft2 tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_15_{axes=(), norm=None, s=None, shape=(2, 3, 4)}::test_ifft2 tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_16_{axes=(0, 1, 2), norm='ortho', s=(2, 3), shape=(2, 3, 4)}::test_fft2 tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_16_{axes=(0, 1, 2), norm='ortho', s=(2, 3), shape=(2, 3, 4)}::test_ifft2 tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_18_{axes=None, norm=None, s=None, shape=(0, 5)}::test_fft2 -tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_18_{axes=None, norm=None, s=None, shape=(0, 5)}::test_ifft2 tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_19_{axes=None, norm=None, s=None, shape=(2, 0, 5)}::test_fft2 -tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_19_{axes=None, norm=None, s=None, shape=(2, 0, 5)}::test_ifft2 -tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_1_{axes=None, norm=None, s=(1, None), shape=(3, 4)}::test_fft2 -tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_1_{axes=None, norm=None, s=(1, None), shape=(3, 4)}::test_ifft2 tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_20_{axes=None, norm=None, s=None, shape=(0, 0, 5)}::test_fft2 -tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_20_{axes=None, norm=None, s=None, shape=(0, 0, 5)}::test_ifft2 -tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_7_{axes=(), norm=None, s=None, shape=(3, 4)}::test_fft2 -tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_7_{axes=(), norm=None, s=None, shape=(3, 4)}::test_ifft2 -tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_9_{axes=None, norm=None, s=(1, 4, None), shape=(2, 3, 4)}::test_fft2 -tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_9_{axes=None, norm=None, s=(1, 4, None), shape=(2, 3, 4)}::test_ifft2 -tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_10_{axes=None, norm=None, s=(1, 4, None), shape=(2, 3, 4)}::test_fftn -tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_10_{axes=None, norm=None, s=(1, 4, None), shape=(2, 3, 4)}::test_ifftn + +tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_1_{axes=None, norm=None, s=(1, None), shape=(3, 4)}::test_fftn +tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_7_{axes=(), norm=None, s=None, shape=(3, 4)}::test_fftn +tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_7_{axes=(), norm=None, s=None, shape=(3, 4)}::test_ifftn tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_17_{axes=(), norm='ortho', s=None, shape=(2, 3, 4)}::test_fftn tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_17_{axes=(), norm='ortho', s=None, shape=(2, 3, 4)}::test_ifftn tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_18_{axes=(0, 1, 2), norm='ortho', s=(2, 3), shape=(2, 3, 4)}::test_fftn tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_18_{axes=(0, 1, 2), norm='ortho', s=(2, 3), shape=(2, 3, 4)}::test_ifftn -tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_1_{axes=None, norm=None, s=(1, None), shape=(3, 4)}::test_fftn -tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_1_{axes=None, norm=None, s=(1, None), shape=(3, 4)}::test_ifftn +tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_10_{axes=None, norm=None, s=(1, 4, None), shape=(2, 3, 4)}::test_fftn tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_21_{axes=None, norm=None, s=None, shape=(0, 5)}::test_fftn -tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_21_{axes=None, norm=None, s=None, shape=(0, 5)}::test_ifftn tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_22_{axes=None, norm=None, s=None, shape=(2, 0, 5)}::test_fftn -tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_22_{axes=None, norm=None, s=None, shape=(2, 0, 5)}::test_ifftn tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_23_{axes=None, norm=None, s=None, shape=(0, 0, 5)}::test_fftn -tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_23_{axes=None, norm=None, s=None, shape=(0, 0, 5)}::test_ifftn -tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_7_{axes=(), norm=None, s=None, shape=(3, 4)}::test_fftn -tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_7_{axes=(), norm=None, s=None, shape=(3, 4)}::test_ifftn -tests/third_party/cupy/fft_tests/test_fft.py::TestFft_param_2_{n=None, norm=None, shape=(10,)}::test_fft -tests/third_party/cupy/fft_tests/test_fft.py::TestFft_param_2_{n=None, norm=None, shape=(10,)}::test_ifft -tests/third_party/cupy/fft_tests/test_fft.py::TestFft_param_2_{n=None, norm=None, shape=(10,)}::test_fftn -tests/third_party/cupy/fft_tests/test_fft.py::TestFft_param_2_{n=None, norm=None, shape=(10,)}::test_ifftn -tests/third_party/cupy/fft_tests/test_fft.py::TestFft_param_6_{n=None, norm='ortho', shape=(10,)}::test_fft -tests/third_party/cupy/fft_tests/test_fft.py::TestFft_param_6_{n=None, norm='ortho', shape=(10,)}::test_ifft -tests/third_party/cupy/fft_tests/test_fft.py::TestFft_param_6_{n=None, norm='ortho', shape=(10,)}::test_fftn -tests/third_party/cupy/fft_tests/test_fft.py::TestFft_param_6_{n=None, norm='ortho', shape=(10,)}::test_ifftn -tests/third_party/cupy/fft_tests/test_fft.py::TestFft_param_3_{n=None, norm=None, shape=(10, 10)}::test_fft -tests/third_party/cupy/fft_tests/test_fft.py::TestFft_param_3_{n=None, norm=None, shape=(10, 10)}::test_ifft -tests/third_party/cupy/fft_tests/test_fft.py::TestFft_param_7_{n=None, norm='ortho', shape=(10, 10)}::test_fft -tests/third_party/cupy/fft_tests/test_fft.py::TestFft_param_7_{n=None, norm='ortho', shape=(10, 10)}::test_ifft -tests/third_party/cupy/fft_tests/test_fft.py::TestRfft_param_0_{n=None, norm=None, shape=(10,)}::test_irfft -tests/third_party/cupy/fft_tests/test_fft.py::TestRfft_param_0_{n=None, norm=None, shape=(10,)}::test_rfft -tests/third_party/cupy/fft_tests/test_fft.py::TestRfft_param_1_{n=None, norm=None, shape=(10, 10)}::test_irfft -tests/third_party/cupy/fft_tests/test_fft.py::TestRfft_param_1_{n=None, norm=None, shape=(10, 10)}::test_rfft tests/third_party/cupy/indexing_tests/test_generate.py::TestAxisConcatenator::test_AxisConcatenator_init1 tests/third_party/cupy/indexing_tests/test_generate.py::TestAxisConcatenator::test_len diff --git a/tests/test_fft.py b/tests/test_fft.py index 66019defd1ac..f1065cc70fc6 100644 --- a/tests/test_fft.py +++ b/tests/test_fft.py @@ -1,63 +1,60 @@ -import pytest - -import dpnp - -import numpy - - -@pytest.mark.parametrize("type", ['complex128', 'complex64', 'float32', 'float64', 'int32', 'int64']) -@pytest.mark.parametrize("norm", [None, 'forward', 'ortho']) -def test_fft(type, norm): - # 1 dim array - data = numpy.arange(100, dtype=numpy.dtype(type)) - # TODO: - # doesn't work correct with `complex64` (not supported) - # dpnp_data = dpnp.arange(100, dtype=dpnp.dtype(type)) - dpnp_data = dpnp.array(data) - - np_res = numpy.fft.fft(data, norm=norm) - dpnp_res = dpnp.asnumpy(dpnp.fft.fft(dpnp_data, norm=norm)) - - numpy.testing.assert_allclose(dpnp_res, np_res, rtol=1e-4, atol=1e-7) - assert dpnp_res.dtype == np_res.dtype - - -@pytest.mark.parametrize("type", ['complex128', 'complex64', 'float32', 'float64', 'int32', 'int64']) -@pytest.mark.parametrize("shape", [(8, 8), (4, 16), (4, 4, 4), (2, 4, 4, 2)]) -@pytest.mark.parametrize("norm", [None, 'forward', 'ortho']) -def test_fft_ndim(type, shape, norm): - np_data = numpy.arange(64, dtype=numpy.dtype(type)).reshape(shape) - dpnp_data = dpnp.arange(64, dtype=numpy.dtype(type)).reshape(shape) - - np_res = numpy.fft.fft(np_data, norm=norm) - dpnp_res = dpnp.fft.fft(dpnp_data, norm=norm) - - numpy.testing.assert_allclose(dpnp_res, np_res, rtol=1e-4, atol=1e-7) - assert dpnp_res.dtype == np_res.dtype - - -@pytest.mark.parametrize("type", ['complex128', 'complex64', 'float32', 'float64', 'int32', 'int64']) -@pytest.mark.parametrize("shape", [(64,), (8, 8), (4, 16), (4, 4, 4), (2, 4, 4, 2)]) -@pytest.mark.parametrize("norm", [None, 'forward', 'ortho']) -def test_fft_ifft(type, shape, norm): - np_data = numpy.arange(64, dtype=numpy.dtype(type)).reshape(shape) - dpnp_data = dpnp.arange(64, dtype=numpy.dtype(type)).reshape(shape) - - np_res = numpy.fft.ifft(np_data, norm=norm) - dpnp_res = dpnp.fft.ifft(dpnp_data, norm=norm) - - numpy.testing.assert_allclose(dpnp_res, np_res, rtol=1e-4, atol=1e-7) - assert dpnp_res.dtype == np_res.dtype - - -@pytest.mark.parametrize("type", ['float32', 'float64', 'int32', 'int64']) -@pytest.mark.parametrize("shape", [(64, ), (8, 8), (4, 16), (4, 4, 4), (2, 4, 4, 2)]) -def test_fft_rfft(type, shape): - np_data = numpy.arange(64, dtype=numpy.dtype(type)).reshape(shape) - dpnp_data = dpnp.arange(64, dtype=numpy.dtype(type)).reshape(shape) - - np_res = numpy.fft.rfft(np_data) - dpnp_res = dpnp.fft.rfft(dpnp_data) - - numpy.testing.assert_allclose(dpnp_res, np_res, rtol=1e-4, atol=1e-7) - assert dpnp_res.dtype == np_res.dtype +import pytest + +import dpnp + +import numpy + + +@pytest.mark.parametrize("type", ['complex128', 'complex64', 'float32', 'float64', 'int32', 'int64']) +@pytest.mark.parametrize("norm", [None, 'forward', 'ortho']) +def test_fft(type, norm): + # 1 dim array + data = numpy.arange(100, dtype=numpy.dtype(type)) + dpnp_data = dpnp.array(data) + + np_res = numpy.fft.fft(data, norm=norm) + dpnp_res = dpnp.asnumpy(dpnp.fft.fft(dpnp_data, norm=norm)) + + numpy.testing.assert_allclose(dpnp_res, np_res, rtol=1e-4, atol=1e-7) + assert dpnp_res.dtype == np_res.dtype + + +@pytest.mark.parametrize("type", ['complex128', 'complex64', 'float32', 'float64', 'int32', 'int64']) +@pytest.mark.parametrize("shape", [(8, 8), (4, 16), (4, 4, 4), (2, 4, 4, 2)]) +@pytest.mark.parametrize("norm", [None, 'forward', 'ortho']) +def test_fft_ndim(type, shape, norm): + np_data = numpy.arange(64, dtype=numpy.dtype(type)).reshape(shape) + dpnp_data = dpnp.arange(64, dtype=numpy.dtype(type)).reshape(shape) + + np_res = numpy.fft.fft(np_data, norm=norm) + dpnp_res = dpnp.fft.fft(dpnp_data, norm=norm) + + numpy.testing.assert_allclose(dpnp_res, np_res, rtol=1e-4, atol=1e-7) + assert dpnp_res.dtype == np_res.dtype + + +@pytest.mark.parametrize("type", ['complex128', 'complex64', 'float32', 'float64', 'int32', 'int64']) +@pytest.mark.parametrize("shape", [(64,), (8, 8), (4, 16), (4, 4, 4), (2, 4, 4, 2)]) +@pytest.mark.parametrize("norm", [None, 'forward', 'ortho']) +def test_fft_ifft(type, shape, norm): + np_data = numpy.arange(64, dtype=numpy.dtype(type)).reshape(shape) + dpnp_data = dpnp.arange(64, dtype=numpy.dtype(type)).reshape(shape) + + np_res = numpy.fft.ifft(np_data, norm=norm) + dpnp_res = dpnp.fft.ifft(dpnp_data, norm=norm) + + numpy.testing.assert_allclose(dpnp_res, np_res, rtol=1e-4, atol=1e-7) + assert dpnp_res.dtype == np_res.dtype + + +@pytest.mark.parametrize("type", ['float32', 'float64', 'int32', 'int64']) +@pytest.mark.parametrize("shape", [(64, ), (8, 8), (4, 16), (4, 4, 4), (2, 4, 4, 2)]) +def test_fft_rfft(type, shape): + np_data = numpy.arange(64, dtype=numpy.dtype(type)).reshape(shape) + dpnp_data = dpnp.arange(64, dtype=numpy.dtype(type)).reshape(shape) + + np_res = numpy.fft.rfft(np_data) + dpnp_res = dpnp.fft.rfft(dpnp_data) + + numpy.testing.assert_allclose(dpnp_res, np_res, rtol=1e-4, atol=1e-7) + assert dpnp_res.dtype == np_res.dtype diff --git a/tests/test_linalg.py b/tests/test_linalg.py index ecd12040fd5d..b9535a7b274f 100644 --- a/tests/test_linalg.py +++ b/tests/test_linalg.py @@ -2,10 +2,19 @@ import dpnp as inp +import dpctl import numpy def vvsort(val, vec, size, xp): + val_kwargs = dict() + if hasattr(val, 'sycl_queue'): + val_kwargs['sycl_queue'] = getattr(val, "sycl_queue", None) + + vec_kwargs = dict() + if hasattr(vec, 'sycl_queue'): + vec_kwargs['sycl_queue'] = getattr(vec, "sycl_queue", None) + for i in range(size): imax = i for j in range(i + 1, size): @@ -17,16 +26,15 @@ def vvsort(val, vec, size, xp): unravel_i = numpy.unravel_index(i, val.shape) unravel_imax = numpy.unravel_index(imax, val.shape) - temp = xp.empty(tuple(), dtype=vec.dtype) - temp[()] = val[unravel_i] # make a copy + # swap elements in val array + temp = xp.array(val[unravel_i], dtype=vec.dtype, **val_kwargs) val[unravel_i] = val[unravel_imax] val[unravel_imax] = temp - for k in range(size): - temp = xp.empty(tuple(), dtype=val.dtype) - temp[()] = vec[k, i] # make a copy - vec[k, i] = vec[k, imax] - vec[k, imax] = temp + # swap corresponding columns in vec matrix + temp = xp.array(vec[:, i], dtype=val.dtype, **vec_kwargs) + vec[:, i] = vec[:, imax] + vec[:, imax] = temp @pytest.mark.parametrize("array", @@ -83,6 +91,9 @@ def test_det(array): @pytest.mark.parametrize("size", [2, 4, 8, 16, 300]) def test_eig_arange(type, size): + if dpctl.get_current_device_type() != dpctl.device_type.gpu: + pytest.skip("eig function doesn\'t work on CPU: https://github.com/IntelPython/dpnp/issues/1005") + a = numpy.arange(size * size, dtype=type).reshape((size, size)) symm_orig = numpy.tril(a) + numpy.tril(a, -1).T + numpy.diag(numpy.full((size,), size * size, dtype=type)) symm = symm_orig @@ -115,14 +126,20 @@ def test_eig_arange(type, size): numpy.testing.assert_allclose(dpnp_vec, np_vec, rtol=1e-05, atol=1e-05) -def test_eigvals(): +@pytest.mark.parametrize("type", + [numpy.float64, numpy.float32, numpy.int64, numpy.int32], + ids=['float64', 'float32', 'int64', 'int32']) +def test_eigvals(type): + if dpctl.get_current_device_type() != dpctl.device_type.gpu: + pytest.skip("eigvals function doesn\'t work on CPU: https://github.com/IntelPython/dpnp/issues/1005") + arrays = [ [[0, 0], [0, 0]], [[1, 2], [1, 2]], [[1, 2], [3, 4]] ] for array in arrays: - a = numpy.array(array) + a = numpy.array(array, dtype=type) ia = inp.array(a) result = inp.linalg.eigvals(ia) expected = numpy.linalg.eigvals(a) @@ -143,23 +160,23 @@ def test_inv(type, array): numpy.testing.assert_allclose(expected, result) -def test_matrix_rank(): - arrays = [ - [0, 0], - # [0, 1], - [1, 2], - [[0, 0], [0, 0]], - # [[1, 2], [1, 2]], - # [[1, 2], [3, 4]], - ] - tols = [None] - for array in arrays: - for tol in tols: - a = numpy.array(array) - ia = inp.array(a) - result = inp.linalg.matrix_rank(ia, tol=tol) - expected = numpy.linalg.matrix_rank(a, tol=tol) - numpy.testing.assert_array_equal(expected, result) +@pytest.mark.parametrize("type", + [numpy.float64, numpy.float32, numpy.int64, numpy.int32], + ids=['float64', 'float32', 'int64', 'int32']) +@pytest.mark.parametrize("array", + [[0, 0], [0, 1], [1, 2], [[0, 0], [0, 0]], [[1, 2], [1, 2]], [[1, 2], [3, 4]]], + ids=['[0, 0]', '[0, 1]', '[1, 2]', '[[0, 0], [0, 0]]', '[[1, 2], [1, 2]]', '[[1, 2], [3, 4]]']) +@pytest.mark.parametrize("tol", + [None], + ids=['None']) +def test_matrix_rank(type, tol, array): + a = numpy.array(array, dtype=type) + ia = inp.array(a) + + result = inp.linalg.matrix_rank(ia, tol=tol) + expected = numpy.linalg.matrix_rank(a, tol=tol) + + numpy.testing.assert_allclose(expected, result) @pytest.mark.parametrize("array", diff --git a/tests/test_sycl_queue.py b/tests/test_sycl_queue.py index 1f625d408014..adffc3c0a3ca 100644 --- a/tests/test_sycl_queue.py +++ b/tests/test_sycl_queue.py @@ -41,6 +41,37 @@ def assert_sycl_queue_equal(result, expected): assert exec_queue is not None +def vvsort(val, vec, size, xp): + val_kwargs = dict() + if hasattr(val, 'sycl_queue'): + val_kwargs['sycl_queue'] = getattr(val, "sycl_queue", None) + + vec_kwargs = dict() + if hasattr(vec, 'sycl_queue'): + vec_kwargs['sycl_queue'] = getattr(vec, "sycl_queue", None) + + for i in range(size): + imax = i + for j in range(i + 1, size): + unravel_imax = numpy.unravel_index(imax, val.shape) + unravel_j = numpy.unravel_index(j, val.shape) + if xp.abs(val[unravel_imax]) < xp.abs(val[unravel_j]): + imax = j + + unravel_i = numpy.unravel_index(i, val.shape) + unravel_imax = numpy.unravel_index(imax, val.shape) + + # swap elements in val array + temp = xp.array(val[unravel_i], dtype=vec.dtype, **val_kwargs) + val[unravel_i] = val[unravel_imax] + val[unravel_imax] = temp + + # swap corresponding columns in vec matrix + temp = xp.array(vec[:, i], dtype=val.dtype, **vec_kwargs) + vec[:, i] = vec[:, imax] + vec[:, imax] = temp + + @pytest.mark.parametrize( "func,data", [ @@ -104,7 +135,6 @@ def test_1in_1out(func, data, device): result_queue = result.get_array().sycl_queue assert_sycl_queue_equal(result_queue, expected_queue) - assert result_queue.sycl_device == expected_queue.sycl_device @pytest.mark.parametrize( @@ -169,7 +199,6 @@ def test_2in_1out(func, data1, data2, device): result_queue = result.get_array().sycl_queue assert_sycl_queue_equal(result_queue, expected_queue) - assert result_queue.sycl_device == expected_queue.sycl_device @pytest.mark.parametrize( @@ -216,7 +245,6 @@ def test_broadcasting(func, data1, data2, device): result_queue = result.get_array().sycl_queue assert_sycl_queue_equal(result_queue, expected_queue) - assert result_queue.sycl_device == expected_queue.sycl_device @pytest.mark.parametrize( @@ -277,7 +305,6 @@ def test_out(func, data1, data2, device): result_queue = result.get_array().sycl_queue assert_sycl_queue_equal(result_queue, expected_queue) - assert result_queue.sycl_device == expected_queue.sycl_device @pytest.mark.parametrize("device", @@ -302,8 +329,255 @@ def test_modf(device): assert_sycl_queue_equal(result1_queue, expected_queue) assert_sycl_queue_equal(result2_queue, expected_queue) - assert result1_queue.sycl_device == expected_queue.sycl_device - assert result2_queue.sycl_device == expected_queue.sycl_device + +@pytest.mark.parametrize("type", ['complex128']) +@pytest.mark.parametrize("device", + valid_devices, + ids=[device.filter_string for device in valid_devices]) +def test_fft(type, device): + data = numpy.arange(100, dtype=numpy.dtype(type)) + + dpnp_data = dpnp.array(data, device=device) + + expected = numpy.fft.fft(data) + result = dpnp.fft.fft(dpnp_data) + + numpy.testing.assert_allclose(result, expected, rtol=1e-4, atol=1e-7) + + expected_queue = dpnp_data.get_array().sycl_queue + result_queue = result.get_array().sycl_queue + + assert_sycl_queue_equal(result_queue, expected_queue) + + +@pytest.mark.parametrize("type", ['float32']) +@pytest.mark.parametrize("shape", [(8,8)]) +@pytest.mark.parametrize("device", + valid_devices, + ids=[device.filter_string for device in valid_devices]) +def test_fft_rfft(type, shape, device): + np_data = numpy.arange(64, dtype=numpy.dtype(type)).reshape(shape) + dpnp_data = dpnp.array(np_data, device=device) + + np_res = numpy.fft.rfft(np_data) + dpnp_res = dpnp.fft.rfft(dpnp_data) + + numpy.testing.assert_allclose(dpnp_res, np_res, rtol=1e-4, atol=1e-7) + assert dpnp_res.dtype == np_res.dtype + + expected_queue = dpnp_data.get_array().sycl_queue + result_queue = dpnp_res.get_array().sycl_queue + + assert_sycl_queue_equal(result_queue, expected_queue) + + +@pytest.mark.parametrize("device", + valid_devices, + ids=[device.filter_string for device in valid_devices]) +def test_cholesky(device): + data = [[[1., -2.], [2., 5.]], [[1., -2.], [2., 5.]]] + numpy_data = numpy.array(data) + dpnp_data = dpnp.array(data, device=device) + + result = dpnp.linalg.cholesky(dpnp_data) + expected = numpy.linalg.cholesky(numpy_data) + numpy.testing.assert_array_equal(expected, result) + + expected_queue = dpnp_data.get_array().sycl_queue + result_queue = result.get_array().sycl_queue + + assert_sycl_queue_equal(result_queue, expected_queue) + + +@pytest.mark.parametrize("device", + valid_devices, + ids=[device.filter_string for device in valid_devices]) +def test_det(device): + data = [[[1, 2], [3, 4]], [[1, 2], [2, 1]], [[1, 3], [3, 1]]] + numpy_data = numpy.array(data) + dpnp_data = dpnp.array(data, device=device) + + result = dpnp.linalg.det(dpnp_data) + expected = numpy.linalg.det(numpy_data) + numpy.testing.assert_allclose(expected, result) + + expected_queue = dpnp_data.get_array().sycl_queue + result_queue = result.get_array().sycl_queue + + assert_sycl_queue_equal(result_queue, expected_queue) + + +@pytest.mark.parametrize("device", + valid_devices, + ids=[device.filter_string for device in valid_devices]) +def test_eig(device): + if device.device_type != dpctl.device_type.gpu: + pytest.skip("eig function doesn\'t work on CPU: https://github.com/IntelPython/dpnp/issues/1005") + + size = 4 + a = numpy.arange(size * size, dtype='float64').reshape((size, size)) + symm_orig = numpy.tril(a) + numpy.tril(a, -1).T + numpy.diag(numpy.full((size,), size * size, dtype='float64')) + numpy_data = symm_orig + dpnp_symm_orig = dpnp.array(numpy_data, device=device) + dpnp_data = dpnp_symm_orig + + dpnp_val, dpnp_vec = dpnp.linalg.eig(dpnp_data) + numpy_val, numpy_vec = numpy.linalg.eig(numpy_data) + + # DPNP sort val/vec by abs value + vvsort(dpnp_val, dpnp_vec, size, dpnp) + + # NP sort val/vec by abs value + vvsort(numpy_val, numpy_vec, size, numpy) + + # NP change sign of vectors + for i in range(numpy_vec.shape[1]): + if numpy_vec[0, i] * dpnp_vec[0, i] < 0: + numpy_vec[:, i] = -numpy_vec[:, i] + + numpy.testing.assert_allclose(dpnp_val, numpy_val, rtol=1e-05, atol=1e-05) + numpy.testing.assert_allclose(dpnp_vec, numpy_vec, rtol=1e-05, atol=1e-05) + + assert (dpnp_val.dtype == numpy_val.dtype) + assert (dpnp_vec.dtype == numpy_vec.dtype) + assert (dpnp_val.shape == numpy_val.shape) + assert (dpnp_vec.shape == numpy_vec.shape) + + expected_queue = dpnp_data.get_array().sycl_queue + dpnp_val_queue = dpnp_val.get_array().sycl_queue + dpnp_vec_queue = dpnp_vec.get_array().sycl_queue + + # compare queue and device + assert_sycl_queue_equal(dpnp_val_queue, expected_queue) + assert_sycl_queue_equal(dpnp_vec_queue, expected_queue) + + +@pytest.mark.parametrize("device", + valid_devices, + ids=[device.filter_string for device in valid_devices]) +def test_eigvals(device): + if device.device_type != dpctl.device_type.gpu: + pytest.skip("eigvals function doesn\'t work on CPU: https://github.com/IntelPython/dpnp/issues/1005") + + data = [[0, 0], [0, 0]] + numpy_data = numpy.array(data) + dpnp_data = dpnp.array(data, device=device) + + result = dpnp.linalg.eigvals(dpnp_data) + expected = numpy.linalg.eigvals(numpy_data) + numpy.testing.assert_allclose(expected, result, atol=0.5) + + expected_queue = dpnp_data.get_array().sycl_queue + result_queue = result.get_array().sycl_queue + + assert_sycl_queue_equal(result_queue, expected_queue) + + +@pytest.mark.parametrize("device", + valid_devices, + ids=[device.filter_string for device in valid_devices]) +def test_inv(device): + data = [[1., 2.], [3., 4.]] + numpy_data = numpy.array(data) + dpnp_data = dpnp.array(data, device=device) + + result = dpnp.linalg.inv(dpnp_data) + expected = numpy.linalg.inv(numpy_data) + numpy.testing.assert_allclose(expected, result) + + expected_queue = dpnp_data.get_array().sycl_queue + result_queue = result.get_array().sycl_queue + + assert_sycl_queue_equal(result_queue, expected_queue) + + +@pytest.mark.parametrize("device", + valid_devices, + ids=[device.filter_string for device in valid_devices]) +def test_matrix_rank(device): + data = [[0, 0], [0, 0]] + numpy_data = numpy.array(data) + dpnp_data = dpnp.array(data, device=device) + + result = dpnp.linalg.matrix_rank(dpnp_data) + expected = numpy.linalg.matrix_rank(numpy_data) + numpy.testing.assert_array_equal(expected, result) + + +@pytest.mark.parametrize("device", + valid_devices, + ids=[device.filter_string for device in valid_devices]) +def test_qr(device): + tol = 1e-11 + data = [[1,2,3], [1,2,3]] + numpy_data = numpy.array(data) + dpnp_data = dpnp.array(data, device=device) + + np_q, np_r = numpy.linalg.qr(numpy_data, "reduced") + dpnp_q, dpnp_r = dpnp.linalg.qr(dpnp_data, "reduced") + + assert (dpnp_q.dtype == np_q.dtype) + assert (dpnp_r.dtype == np_r.dtype) + assert (dpnp_q.shape == np_q.shape) + assert (dpnp_r.shape == np_r.shape) + + numpy.testing.assert_allclose(dpnp_q, np_q, rtol=tol, atol=tol) + numpy.testing.assert_allclose(dpnp_r, np_r, rtol=tol, atol=tol) + + expected_queue = dpnp_data.get_array().sycl_queue + dpnp_q_queue = dpnp_q.get_array().sycl_queue + dpnp_r_queue = dpnp_r.get_array().sycl_queue + + # compare queue and device + assert_sycl_queue_equal(dpnp_q_queue, expected_queue) + assert_sycl_queue_equal(dpnp_r_queue, expected_queue) + + +@pytest.mark.parametrize("device", + valid_devices, + ids=[device.filter_string for device in valid_devices]) +def test_svd(device): + tol = 1e-12 + shape = (2,2) + numpy_data = numpy.arange(shape[0] * shape[1]).reshape(shape) + dpnp_data = dpnp.arange(shape[0] * shape[1]).reshape(shape) + np_u, np_s, np_vt = numpy.linalg.svd(numpy_data) + dpnp_u, dpnp_s, dpnp_vt = dpnp.linalg.svd(dpnp_data) + + assert (dpnp_u.dtype == np_u.dtype) + assert (dpnp_s.dtype == np_s.dtype) + assert (dpnp_vt.dtype == np_vt.dtype) + assert (dpnp_u.shape == np_u.shape) + assert (dpnp_s.shape == np_s.shape) + assert (dpnp_vt.shape == np_vt.shape) + + # check decomposition + dpnp_diag_s = dpnp.zeros(shape, dtype=dpnp_s.dtype) + for i in range(dpnp_s.size): + dpnp_diag_s[i, i] = dpnp_s[i] + + # check decomposition + numpy.testing.assert_allclose(dpnp_data, dpnp.dot(dpnp_u, dpnp.dot(dpnp_diag_s, dpnp_vt)), rtol=tol, atol=tol) + + for i in range(min(shape[0], shape[1])): + if np_u[0, i] * dpnp_u[0, i] < 0: + np_u[:, i] = -np_u[:, i] + np_vt[i, :] = -np_vt[i, :] + + # compare vectors for non-zero values + for i in range(numpy.count_nonzero(np_s > tol)): + numpy.testing.assert_allclose(dpnp.asnumpy(dpnp_u)[:, i], np_u[:, i], rtol=tol, atol=tol) + numpy.testing.assert_allclose(dpnp.asnumpy(dpnp_vt)[i, :], np_vt[i, :], rtol=tol, atol=tol) + + expected_queue = dpnp_data.get_array().sycl_queue + dpnp_u_queue = dpnp_u.get_array().sycl_queue + dpnp_s_queue = dpnp_s.get_array().sycl_queue + dpnp_vt_queue = dpnp_vt.get_array().sycl_queue + + # compare queue and device + assert_sycl_queue_equal(dpnp_u_queue, expected_queue) + assert_sycl_queue_equal(dpnp_s_queue, expected_queue) + assert_sycl_queue_equal(dpnp_vt_queue, expected_queue) @pytest.mark.parametrize("device_from", From 384a4baf9caaaa19afd9733b0d29531e49ae0817 Mon Sep 17 00:00:00 2001 From: Evseniia Komarova Date: Wed, 19 Oct 2022 16:29:48 +0200 Subject: [PATCH 29/30] update numpy restriction by adding numpy 1.22 (#1207) * update numpy restriction by adding numpy 1.22 * Update conda-recipe/meta.yaml Also exclude alpha/beta builds of 1.23 sources Co-authored-by: Oleksandr Pavlyk --- conda-recipe/meta.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml index dccf855c184c..8a42ff3f0c7d 100644 --- a/conda-recipe/meta.yaml +++ b/conda-recipe/meta.yaml @@ -8,7 +8,7 @@ requirements: host: - python - setuptools - - numpy >=1.19,<1.22a0 + - numpy >=1.19,<1.23a0 - cython - cmake >=3.19 - dpctl >=0.13 From 78c4b8b02f6907a415c7d963ba21181592f2e525 Mon Sep 17 00:00:00 2001 From: Anton <100830759+antonwolfy@users.noreply.github.com> Date: Thu, 20 Oct 2022 16:23:30 +0200 Subject: [PATCH 30/30] Get rid of env var which handles dep on dpctl.tensor (#1205) * Get rid of env var stating dep on dpctl.tensor * Apply review comments --- dpnp/config.py | 7 +------ dpnp/dpnp_array.py | 4 +++- dpnp/dpnp_iface.py | 25 ++++++++++++------------- dpnp/dpnp_utils/dpnp_algo_utils.pyx | 8 ++++---- 4 files changed, 20 insertions(+), 24 deletions(-) diff --git a/dpnp/config.py b/dpnp/config.py index a9a6b6ec7c52..9298994a8421 100644 --- a/dpnp/config.py +++ b/dpnp/config.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # ***************************************************************************** -# Copyright (c) 2016-2020, Intel Corporation +# Copyright (c) 2016-2022, Intel Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -43,11 +43,6 @@ Explicitly use NumPy.ndarray as return type for creation functions ''' -__DPNP_OUTPUT_DPCTL__ = int(os.getenv('DPNP_OUTPUT_DPCTL', 1)) -''' -Explicitly use DPCtl package container as return type for creation functions -''' - __DPNP_OUTPUT_DPCTL_DEFAULT_SHARED__ = int(os.getenv('DPNP_OUTPUT_DPCTL_DEFAULT_SHARED', 0)) ''' Explicitly use SYCL shared memory parameter in DPCtl array constructor for creation functions diff --git a/dpnp/dpnp_array.py b/dpnp/dpnp_array.py index 2b779f57b142..8f5114af1a6a 100644 --- a/dpnp/dpnp_array.py +++ b/dpnp/dpnp_array.py @@ -443,7 +443,9 @@ def astype(self, dtype, order='K', casting='unsafe', subok=True, copy=True): """ - return dpnp.astype(self, dtype, order, casting, subok, copy) + new_array = self.__new__(dpnp_array) + new_array._array_obj = dpt.astype(self._array_obj, dtype, order=order, casting=casting, copy=copy) + return new_array # 'base', # 'byteswap', diff --git a/dpnp/dpnp_iface.py b/dpnp/dpnp_iface.py index 5aa69ced4aec..f1fda6168fb3 100644 --- a/dpnp/dpnp_iface.py +++ b/dpnp/dpnp_iface.py @@ -49,6 +49,7 @@ import dpctl import dpctl.tensor as dpt +from dpnp.dpnp_array import dpnp_array from dpnp.dpnp_algo import * from dpnp.dpnp_utils import * from dpnp.fft import * @@ -137,26 +138,24 @@ def asnumpy(input, order='C'): This function works exactly the same as :obj:`numpy.asarray`. """ + if isinstance(input, dpnp_array): + return dpt.asnumpy(input.get_array()) - if isinstance(input, dpctl.tensor.usm_ndarray): - return dpctl.tensor.to_numpy(input) - - if config.__DPNP_OUTPUT_DPCTL__ and hasattr(input, "__sycl_usm_array_interface__"): - return dpctl.tensor.to_numpy(input.get_array()) + if isinstance(input, dpt.usm_ndarray): + return dpt.asnumpy(input) return numpy.asarray(input, order=order) def astype(x1, dtype, order='K', casting='unsafe', subok=True, copy=True): """Copy the array with data type casting.""" - if config.__DPNP_OUTPUT_DPCTL__ and hasattr(x1, "__sycl_usm_array_interface__"): - import dpctl.tensor as dpt - # TODO: remove check dpctl.tensor has attribute "astype" - if hasattr(dpt, "astype"): - # return dpt.astype(x1, dtype, order=order, casting=casting, copy=copy) - return dpt.astype(x1.get_array(), dtype, order=order, casting=casting, copy=copy) - - x1_desc = get_dpnp_descriptor(x1) + if isinstance(x1, dpnp_array): + return x1.astype(dtype, order=order, casting=casting, copy=copy) + + if isinstance(x1, dpt.usm_ndarray): + return dpt.astype(x1, dtype, order=order, casting=casting, copy=copy) + + x1_desc = get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if not x1_desc: pass elif order != 'K': diff --git a/dpnp/dpnp_utils/dpnp_algo_utils.pyx b/dpnp/dpnp_utils/dpnp_algo_utils.pyx index aac77164b911..13b2d2e4fdb5 100644 --- a/dpnp/dpnp_utils/dpnp_algo_utils.pyx +++ b/dpnp/dpnp_utils/dpnp_algo_utils.pyx @@ -1,7 +1,7 @@ # cython: language_level=3 # -*- coding: utf-8 -*- # ***************************************************************************** -# Copyright (c) 2016-2020, Intel Corporation +# Copyright (c) 2016-2022, Intel Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -100,10 +100,10 @@ def convert_list_args(input_list): def copy_from_origin(dst, src): """Copy origin result to output result.""" - if config.__DPNP_OUTPUT_DPCTL__ and hasattr(dst, "__sycl_usm_array_interface__"): + if hasattr(dst, "__sycl_usm_array_interface__"): if src.size: - # dst.usm_data.copy_from_host(src.reshape(-1).view("|u1")) - dpctl.tensor._copy_utils._copy_from_numpy_into(unwrap_array(dst), src) + dst_dpt = unwrap_array(dst) + dst_dpt[...] = src else: for i in range(dst.size): dst.flat[i] = src.item(i)