From 7bd11b2b80644c0e627d3573e823919028afa213 Mon Sep 17 00:00:00 2001 From: Denis Smirnov Date: Fri, 4 Feb 2022 03:09:25 -0600 Subject: [PATCH 01/18] Extand main cython funcs with queues and events --- dpnp/backend/include/dpnp_iface_fptr.hpp | 1 - .../kernels/dpnp_krnl_arraycreation.cpp | 4 +- dpnp/backend/kernels/dpnp_krnl_bitwise.cpp | 13 +- dpnp/backend/kernels/dpnp_krnl_common.cpp | 16 +- dpnp/backend/kernels/dpnp_krnl_elemwise.cpp | 19 +- .../kernels/dpnp_krnl_mathematical.cpp | 3 +- dpnp/dpnp_algo/dpnp_algo.pxd | 219 ++++++++++++++-- dpnp/dpnp_algo/dpnp_algo.pyx | 229 +++++++++++----- dpnp/dpnp_algo/dpnp_algo_arraycreation.pyx | 245 +++++++++++++++--- dpnp/dpnp_algo/dpnp_algo_bitwise.pyx | 12 +- dpnp/dpnp_algo/dpnp_algo_counting.pyx | 2 +- dpnp/dpnp_algo/dpnp_algo_manipulation.pyx | 34 ++- dpnp/dpnp_algo/dpnp_algo_mathematical.pyx | 90 ++++--- dpnp/dpnp_algo/dpnp_algo_sorting.pyx | 4 +- dpnp/dpnp_algo/dpnp_algo_special.pyx | 2 +- dpnp/dpnp_algo/dpnp_algo_statistics.pyx | 31 ++- dpnp/dpnp_algo/dpnp_algo_trigonometric.pyx | 50 ++-- 17 files changed, 736 insertions(+), 238 deletions(-) diff --git a/dpnp/backend/include/dpnp_iface_fptr.hpp b/dpnp/backend/include/dpnp_iface_fptr.hpp index e79e5d2a1c24..8e33fa4db9bb 100644 --- a/dpnp/backend/include/dpnp_iface_fptr.hpp +++ b/dpnp/backend/include/dpnp_iface_fptr.hpp @@ -379,7 +379,6 @@ enum class DPNPFuncName : size_t DPNP_FN_ZEROS_LIKE, /**< Used in numpy.zeros_like() impl */ DPNP_FN_ZEROS_LIKE_EXT, /**< Used in numpy.zeros_like() impl, requires extra parameters */ DPNP_FN_LAST, /**< The latest element of the enumeration */ - DPNP_FN_LAST_EXT /**< The latest element of the enumeration, requires extra parameters */ }; /** diff --git a/dpnp/backend/kernels/dpnp_krnl_arraycreation.cpp b/dpnp/backend/kernels/dpnp_krnl_arraycreation.cpp index d359f7a34de3..9f1b742c84e1 100644 --- a/dpnp/backend/kernels/dpnp_krnl_arraycreation.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_arraycreation.cpp @@ -73,9 +73,9 @@ DPCTLSyclEventRef dpnp_arange_c(DPCTLSyclQueueRef q_ref, event = q.submit(kernel_func); - event.wait(); + event_ref = reinterpret_cast(&event); - return event_ref; + return DPCTLEvent_Copy(event_ref); } template diff --git a/dpnp/backend/kernels/dpnp_krnl_bitwise.cpp b/dpnp/backend/kernels/dpnp_krnl_bitwise.cpp index d96bc79aa867..4d0f6498ed0f 100644 --- a/dpnp/backend/kernels/dpnp_krnl_bitwise.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_bitwise.cpp @@ -68,9 +68,9 @@ DPCTLSyclEventRef dpnp_invert_c(DPCTLSyclQueueRef q_ref, event = q.submit(kernel_func); - event.wait(); + event_ref = reinterpret_cast(&event); - return event_ref; + return DPCTLEvent_Copy(event_ref); } template @@ -101,6 +101,9 @@ static void func_map_init_bitwise_1arg_1type(func_map_t& fmap) fmap[DPNPFuncName::DPNP_FN_INVERT][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_invert_default_c}; fmap[DPNPFuncName::DPNP_FN_INVERT][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_invert_default_c}; + fmap[DPNPFuncName::DPNP_FN_INVERT_EXT][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_invert_ext_c}; + fmap[DPNPFuncName::DPNP_FN_INVERT_EXT][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_invert_ext_c}; + return; } @@ -209,7 +212,6 @@ static void func_map_init_bitwise_1arg_1type(func_map_t& fmap) cgh.parallel_for>(gws, kernel_parallel_for_func); \ }; \ event = q.submit(kernel_func); \ - event.wait(); \ } \ else \ { \ @@ -223,9 +225,10 @@ static void func_map_init_bitwise_1arg_1type(func_map_t& fmap) cgh.parallel_for>(gws, kernel_parallel_for_func); \ }; \ event = q.submit(kernel_func); \ - event.wait(); \ } \ - return event_ref; \ + event_ref = reinterpret_cast(&event); \ + \ + return DPCTLEvent_Copy(event_ref); \ } \ \ template \ diff --git a/dpnp/backend/kernels/dpnp_krnl_common.cpp b/dpnp/backend/kernels/dpnp_krnl_common.cpp index 95bbe90b8826..4c671f45604d 100644 --- a/dpnp/backend/kernels/dpnp_krnl_common.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_common.cpp @@ -81,9 +81,9 @@ DPCTLSyclEventRef dpnp_astype_c(DPCTLSyclQueueRef q_ref, event = q.submit(kernel_func); - event.wait(); + event_ref = reinterpret_cast(&event); - return event_ref; + return DPCTLEvent_Copy(event_ref); } template @@ -732,9 +732,9 @@ DPCTLSyclEventRef dpnp_initval_c(DPCTLSyclQueueRef q_ref, sycl::event event = q.submit(kernel_func); - event.wait(); + event_ref = reinterpret_cast(&event); - return event_ref; + return DPCTLEvent_Copy(event_ref); } template @@ -756,10 +756,10 @@ void (*dpnp_initval_default_c)(void*, void*, size_t) = dpnp_initval_c<_DataType> template DPCTLSyclEventRef (*dpnp_initval_ext_c)(DPCTLSyclQueueRef, - void*, - void*, - size_t, - const DPCTLEventVectorRef) = dpnp_initval_c<_DataType>; + void*, + void*, + size_t, + const DPCTLEventVectorRef) = dpnp_initval_c<_DataType>; template class dpnp_matmul_c_kernel; diff --git a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp index a3bba3124051..e9864d3da8a9 100644 --- a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp @@ -116,7 +116,6 @@ gws, kernel_parallel_for_func); \ }; \ event = q.submit(kernel_func); \ - event.wait(); \ } \ else \ { \ @@ -142,9 +141,11 @@ { \ event = q.submit(kernel_func); \ } \ - event.wait(); \ } \ - return event_ref; \ + \ + event_ref = reinterpret_cast(&event); \ + \ + return DPCTLEvent_Copy(event_ref); \ } \ \ template \ @@ -643,9 +644,9 @@ static void func_map_init_elemwise_1arg_2type(func_map_t& fmap) } \ } \ \ - event.wait(); \ + event_ref = reinterpret_cast(&event); \ \ - return event_ref; \ + return DPCTLEvent_Copy(event_ref); \ } \ \ template \ @@ -941,6 +942,8 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap) \ input1_it->~DPNPC_id(); \ input2_it->~DPNPC_id(); \ + \ + return event_ref; \ } \ else if (use_strides) \ { \ @@ -969,7 +972,6 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap) }; \ \ event = q.submit(kernel_func); \ - event.wait(); \ } \ else \ { \ @@ -995,9 +997,10 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap) }; \ event = q.submit(kernel_func); \ } \ - event.wait(); \ } \ - return event_ref; \ + event_ref = reinterpret_cast(&event); \ + \ + return DPCTLEvent_Copy(event_ref); \ } \ \ template \ diff --git a/dpnp/backend/kernels/dpnp_krnl_mathematical.cpp b/dpnp/backend/kernels/dpnp_krnl_mathematical.cpp index 06abddf88532..bf69fce8e416 100644 --- a/dpnp/backend/kernels/dpnp_krnl_mathematical.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_mathematical.cpp @@ -482,7 +482,8 @@ DPCTLSyclEventRef dpnp_ediff1d_c(DPCTLSyclQueueRef q_ref, gws, kernel_parallel_for_func); }; event = q.submit(kernel_func); - event.wait(); + + event_ref = reinterpret_cast(&event); return DPCTLEvent_Copy(event_ref); } diff --git a/dpnp/dpnp_algo/dpnp_algo.pxd b/dpnp/dpnp_algo/dpnp_algo.pxd index 1cc650bdaaa8..a4f4156a8ed6 100644 --- a/dpnp/dpnp_algo/dpnp_algo.pxd +++ b/dpnp/dpnp_algo/dpnp_algo.pxd @@ -25,6 +25,8 @@ # THE POSSIBILITY OF SUCH DAMAGE. # ***************************************************************************** +cimport dpctl as c_dpctl + from libcpp cimport bool as cpp_bool from dpnp.dpnp_utils.dpnp_algo_utils cimport dpnp_descriptor @@ -34,166 +36,325 @@ from dpnp.dpnp_algo cimport shape_elem_type, shape_type_c cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName": # need this namespace for Enum import cdef enum DPNPFuncName "DPNPFuncName": DPNP_FN_ABSOLUTE + DPNP_FN_ABSOLUTE_EXT DPNP_FN_ADD + DPNP_FN_ADD_EXT DPNP_FN_ALL + DPNP_FN_ALL_EXT DPNP_FN_ALLCLOSE + DPNP_FN_ALLCLOSE_EXT DPNP_FN_ANY + DPNP_FN_ANY_EXT DPNP_FN_ARANGE + DPNP_FN_ARANGE_EXT DPNP_FN_ARCCOS + DPNP_FN_ARCCOS_EXT DPNP_FN_ARCCOSH + DPNP_FN_ARCCOSH_EXT DPNP_FN_ARCSIN + DPNP_FN_ARCSIN_EXT DPNP_FN_ARCSINH + DPNP_FN_ARCSINH_EXT DPNP_FN_ARCTAN + DPNP_FN_ARCTAN_EXT DPNP_FN_ARCTAN2 + DPNP_FN_ARCTAN2_EXT DPNP_FN_ARCTANH + DPNP_FN_ARCTANH_EXT DPNP_FN_ARGMAX + DPNP_FN_ARGMAX_EXT DPNP_FN_ARGMIN + DPNP_FN_ARGMIN_EXT DPNP_FN_ARGSORT + DPNP_FN_ARGSORT_EXT DPNP_FN_AROUND + DPNP_FN_AROUND_EXT DPNP_FN_ASTYPE + DPNP_FN_ASTYPE_EXT DPNP_FN_BITWISE_AND + DPNP_FN_BITWISE_AND_EXT DPNP_FN_BITWISE_OR + DPNP_FN_BITWISE_OR_EXT DPNP_FN_BITWISE_XOR + DPNP_FN_BITWISE_XOR_EXT DPNP_FN_CBRT + DPNP_FN_CBRT_EXT DPNP_FN_CEIL + DPNP_FN_CEIL_EXT DPNP_FN_CHOLESKY + DPNP_FN_CHOLESKY_EXT DPNP_FN_CHOOSE + DPNP_FN_CHOOSE_EXT DPNP_FN_CONJIGUATE + DPNP_FN_CONJIGUATE_EXT DPNP_FN_COPY + DPNP_FN_COPY_EXT DPNP_FN_COPYSIGN + DPNP_FN_COPYSIGN_EXT DPNP_FN_COPYTO + DPNP_FN_COPYTO_EXT DPNP_FN_CORRELATE + DPNP_FN_CORRELATE_EXT DPNP_FN_COS + DPNP_FN_COS_EXT DPNP_FN_COSH + DPNP_FN_COSH_EXT DPNP_FN_COV + DPNP_FN_COV_EXT DPNP_FN_COUNT_NONZERO + DPNP_FN_COUNT_NONZERO_EXT DPNP_FN_CROSS + DPNP_FN_CROSS_EXT DPNP_FN_CUMPROD + DPNP_FN_CUMPROD_EXT DPNP_FN_CUMSUM + DPNP_FN_CUMSUM_EXT DPNP_FN_DEGREES + DPNP_FN_DEGREES_EXT DPNP_FN_DET + DPNP_FN_DET_EXT DPNP_FN_DIAG + DPNP_FN_DIAG_EXT DPNP_FN_DIAG_INDICES + DPNP_FN_DIAG_INDICES_EXT DPNP_FN_DIAGONAL + DPNP_FN_DIAGONAL_EXT DPNP_FN_DIVIDE + DPNP_FN_DIVIDE_EXT DPNP_FN_DOT + DPNP_FN_DOT_EXT DPNP_FN_EDIFF1D + DPNP_FN_EDIFF1D_EXT DPNP_FN_EIG + DPNP_FN_EIG_EXT DPNP_FN_EIGVALS + DPNP_FN_EIGVALS_EXT DPNP_FN_ERF + DPNP_FN_ERF_EXT DPNP_FN_EYE + DPNP_FN_EYE_EXT DPNP_FN_EXP + DPNP_FN_EXP_EXT DPNP_FN_EXP2 + DPNP_FN_EXP2_EXT DPNP_FN_EXPM1 + DPNP_FN_EXPM1_EXT DPNP_FN_FABS + DPNP_FN_FABS_EXT DPNP_FN_FFT_FFT + DPNP_FN_FFT_FFT_EXT DPNP_FN_FILL_DIAGONAL + DPNP_FN_FILL_DIAGONAL_EXT DPNP_FN_FLATTEN + DPNP_FN_FLATTEN_EXT DPNP_FN_FLOOR + DPNP_FN_FLOOR_EXT DPNP_FN_FLOOR_DIVIDE + DPNP_FN_FLOOR_DIVIDE_EXT DPNP_FN_FMOD + DPNP_FN_FMOD_EXT DPNP_FN_FULL + DPNP_FN_FULL_EXT DPNP_FN_FULL_LIKE + DPNP_FN_FULL_LIKE_EXT DPNP_FN_HYPOT + DPNP_FN_HYPOT_EXT DPNP_FN_IDENTITY + DPNP_FN_IDENTITY_EXT DPNP_FN_INITVAL + DPNP_FN_INITVAL_EXT DPNP_FN_INV + DPNP_FN_INV_EXT DPNP_FN_INVERT + DPNP_FN_INVERT_EXT DPNP_FN_KRON + DPNP_FN_KRON_EXT DPNP_FN_LEFT_SHIFT + DPNP_FN_LEFT_SHIFT_EXT DPNP_FN_LOG + DPNP_FN_LOG_EXT DPNP_FN_LOG10 + DPNP_FN_LOG10_EXT DPNP_FN_LOG1P + DPNP_FN_LOG1P_EXT DPNP_FN_LOG2 + DPNP_FN_LOG2_EXT DPNP_FN_MATMUL DPNP_FN_MATMUL_EXT DPNP_FN_MATRIX_RANK + DPNP_FN_MATRIX_RANK_EXT DPNP_FN_MAX + DPNP_FN_MAX_EXT DPNP_FN_MAXIMUM + DPNP_FN_MAXIMUM_EXT DPNP_FN_MEAN + DPNP_FN_MEAN_EXT DPNP_FN_MEDIAN + DPNP_FN_MEDIAN_EXT DPNP_FN_MIN + DPNP_FN_MIN_EXT DPNP_FN_MINIMUM + DPNP_FN_MINIMUM_EXT DPNP_FN_MODF + DPNP_FN_MODF_EXT DPNP_FN_MULTIPLY + DPNP_FN_MULTIPLY_EXT DPNP_FN_NANVAR + DPNP_FN_NANVAR_EXT DPNP_FN_NEGATIVE + DPNP_FN_NEGATIVE_EXT DPNP_FN_NONZERO + DPNP_FN_NONZERO_EXT DPNP_FN_ONES + DPNP_FN_ONES_EXT DPNP_FN_ONES_LIKE + DPNP_FN_ONES_LIKE_EXT DPNP_FN_PARTITION + DPNP_FN_PARTITION_EXT DPNP_FN_PLACE + DPNP_FN_PLACE_EXT DPNP_FN_POWER + DPNP_FN_POWER_EXT DPNP_FN_PROD + DPNP_FN_PROD_EXT DPNP_FN_PTP + DPNP_FN_PTP_EXT DPNP_FN_PUT + DPNP_FN_PUT_EXT DPNP_FN_QR + DPNP_FN_QR_EXT DPNP_FN_RADIANS + DPNP_FN_RADIANS_EXT DPNP_FN_REMAINDER + DPNP_FN_REMAINDER_EXT DPNP_FN_RECIP + DPNP_FN_RECIP_EXT DPNP_FN_REPEAT + DPNP_FN_REPEAT_EXT DPNP_FN_RIGHT_SHIFT + DPNP_FN_RIGHT_SHIFT_EXT DPNP_FN_RNG_BETA + DPNP_FN_RNG_BETA_EXT DPNP_FN_RNG_BINOMIAL + DPNP_FN_RNG_BINOMIAL_EXT DPNP_FN_RNG_CHISQUARE + DPNP_FN_RNG_CHISQUARE_EXT DPNP_FN_RNG_EXPONENTIAL + DPNP_FN_RNG_EXPONENTIAL_EXT DPNP_FN_RNG_F + DPNP_FN_RNG_F_EXT DPNP_FN_RNG_GAMMA + DPNP_FN_RNG_GAMMA_EXT DPNP_FN_RNG_GAUSSIAN + DPNP_FN_RNG_GAUSSIAN_EXT DPNP_FN_RNG_GEOMETRIC + DPNP_FN_RNG_GEOMETRIC_EXT DPNP_FN_RNG_GUMBEL + DPNP_FN_RNG_GUMBEL_EXT DPNP_FN_RNG_HYPERGEOMETRIC + DPNP_FN_RNG_HYPERGEOMETRIC_EXT DPNP_FN_RNG_LAPLACE + DPNP_FN_RNG_LAPLACE_EXT DPNP_FN_RNG_LOGISTIC + DPNP_FN_RNG_LOGISTIC_EXT DPNP_FN_RNG_LOGNORMAL + DPNP_FN_RNG_LOGNORMAL_EXT DPNP_FN_RNG_MULTINOMIAL + DPNP_FN_RNG_MULTINOMIAL_EXT DPNP_FN_RNG_MULTIVARIATE_NORMAL + DPNP_FN_RNG_MULTIVARIATE_NORMAL_EXT DPNP_FN_RNG_NEGATIVE_BINOMIAL + DPNP_FN_RNG_NEGATIVE_BINOMIAL_EXT DPNP_FN_RNG_NONCENTRAL_CHISQUARE + DPNP_FN_RNG_NONCENTRAL_CHISQUARE_EXT DPNP_FN_RNG_NORMAL + DPNP_FN_RNG_NORMAL_EXT DPNP_FN_RNG_PARETO + DPNP_FN_RNG_PARETO_EXT DPNP_FN_RNG_POISSON + DPNP_FN_RNG_POISSON_EXT DPNP_FN_RNG_POWER + DPNP_FN_RNG_POWER_EXT DPNP_FN_PUT_ALONG_AXIS + DPNP_FN_PUT_ALONG_AXIS_EXT DPNP_FN_RNG_RAYLEIGH + DPNP_FN_RNG_RAYLEIGH_EXT DPNP_FN_RNG_SHUFFLE + DPNP_FN_RNG_SHUFFLE_EXT DPNP_FN_RNG_SRAND + DPNP_FN_RNG_SRAND_EXT DPNP_FN_RNG_STANDARD_CAUCHY + DPNP_FN_RNG_STANDARD_CAUCHY_EXT DPNP_FN_RNG_STANDARD_EXPONENTIAL + DPNP_FN_RNG_STANDARD_EXPONENTIAL_EXT DPNP_FN_RNG_STANDARD_GAMMA + DPNP_FN_RNG_STANDARD_GAMMA_EXT DPNP_FN_RNG_STANDARD_NORMAL + DPNP_FN_RNG_STANDARD_NORMAL_EXT DPNP_FN_RNG_STANDARD_T + DPNP_FN_RNG_STANDARD_T_EXT DPNP_FN_RNG_TRIANGULAR + DPNP_FN_RNG_TRIANGULAR_EXT DPNP_FN_RNG_UNIFORM + DPNP_FN_RNG_UNIFORM_EXT DPNP_FN_RNG_VONMISES + DPNP_FN_RNG_VONMISES_EXT DPNP_FN_RNG_WALD + DPNP_FN_RNG_WALD_EXT DPNP_FN_RNG_WEIBULL + DPNP_FN_RNG_WEIBULL_EXT DPNP_FN_RNG_ZIPF + DPNP_FN_RNG_ZIPF_EXT DPNP_FN_SEARCHSORTED + DPNP_FN_SEARCHSORTED_EXT DPNP_FN_SIGN + DPNP_FN_SIGN_EXT DPNP_FN_SIN + DPNP_FN_SIN_EXT DPNP_FN_SINH + DPNP_FN_SINH_EXT DPNP_FN_SORT + DPNP_FN_SORT_EXT DPNP_FN_SQRT + DPNP_FN_SQRT_EXT DPNP_FN_SQUARE + DPNP_FN_SQUARE_EXT DPNP_FN_STD + DPNP_FN_STD_EXT DPNP_FN_SUBTRACT + DPNP_FN_SUBTRACT_EXT DPNP_FN_SUM + DPNP_FN_SUM_EXT DPNP_FN_SVD + DPNP_FN_SVD_EXT DPNP_FN_TAKE + DPNP_FN_TAKE_EXT DPNP_FN_TAN + DPNP_FN_TAN_EXT DPNP_FN_TANH + DPNP_FN_TANH_EXT DPNP_FN_TRACE + DPNP_FN_TRACE_EXT DPNP_FN_TRANSPOSE + DPNP_FN_TRANSPOSE_EXT DPNP_FN_TRAPZ + DPNP_FN_TRAPZ_EXT DPNP_FN_TRI + DPNP_FN_TRI_EXT DPNP_FN_TRIL + DPNP_FN_TRIL_EXT DPNP_FN_TRIU + DPNP_FN_TRIU_EXT DPNP_FN_TRUNC + DPNP_FN_TRUNC_EXT DPNP_FN_VANDER + DPNP_FN_VANDER_EXT DPNP_FN_VAR + DPNP_FN_VAR_EXT DPNP_FN_ZEROS + DPNP_FN_ZEROS_EXT DPNP_FN_ZEROS_LIKE + DPNP_FN_ZEROS_LIKE_EXT cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncType": # need this namespace for Enum import cdef enum DPNPFuncType "DPNPFuncType": @@ -234,22 +395,48 @@ cdef extern from "dpnp_iface.hpp": # C function pointer to the C library template functions -ctypedef void(*fptr_1out_t)(void * , size_t) -ctypedef void(*fptr_1in_1out_t)(void *, void * , size_t) -ctypedef void(*fptr_1in_1out_strides_t)(void *, const size_t, const size_t, - const shape_elem_type * , const shape_elem_type * , - void *, const size_t, const size_t, - const shape_elem_type * , const shape_elem_type * , - const long * ) -ctypedef void(*fptr_2in_1out_t)(void * , const void * , const size_t, const shape_elem_type * , const size_t, - const void *, const size_t, const shape_elem_type * , const size_t, const long * ) -ctypedef void(*fptr_2in_1out_strides_t)(void *, const size_t, const size_t, - const shape_elem_type * , const shape_elem_type * , - void *, const size_t, const size_t, - const shape_elem_type * , const shape_elem_type * , - void *, const size_t, const size_t, - const shape_elem_type * , const shape_elem_type * , - const long * ) +ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_1out_t)(c_dpctl.DPCTLSyclQueueRef, + void * , size_t, + const c_dpctl.DPCTLEventVectorRef) +ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_1in_1out_t)(c_dpctl.DPCTLSyclQueueRef, + void *, void * , size_t, + const c_dpctl.DPCTLEventVectorRef) +ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_1in_1out_strides_t)(c_dpctl.DPCTLSyclQueueRef, + void *, const size_t, const size_t, + const shape_elem_type * , const shape_elem_type * , + void *, const size_t, const size_t, + const shape_elem_type * , const shape_elem_type * , + const long * , + const c_dpctl.DPCTLEventVectorRef) +ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_2in_1out_t)(c_dpctl.DPCTLSyclQueueRef, + void * , + const void * , + const size_t, + const shape_elem_type * , + const size_t, + const void *, + const size_t, + const shape_elem_type * , + const size_t, + const long * , + const c_dpctl.DPCTLEventVectorRef) +ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_2in_1out_strides_t)(c_dpctl.DPCTLSyclQueueRef, + void *, + const size_t, + const size_t, + const shape_elem_type * , + const shape_elem_type * , + void *, + const size_t, + const size_t, + const shape_elem_type * , + const shape_elem_type * , + void *, + const size_t, const size_t, + const shape_elem_type * , + const shape_elem_type * , + const long * , + const c_dpctl.DPCTLEventVectorRef) ctypedef void(*fptr_blas_gemm_2in_1out_t)(void *, void * , void * , size_t, size_t, size_t) ctypedef void(*dpnp_reduction_c_t)(void *, const void * , const shape_elem_type*, const size_t, const shape_elem_type*, const size_t, const void * , const long*) diff --git a/dpnp/dpnp_algo/dpnp_algo.pyx b/dpnp/dpnp_algo/dpnp_algo.pyx index f9445381c78c..3018776e596d 100644 --- a/dpnp/dpnp_algo/dpnp_algo.pyx +++ b/dpnp/dpnp_algo/dpnp_algo.pyx @@ -36,10 +36,10 @@ from libc.time cimport time, time_t from libcpp.vector cimport vector import dpnp import dpnp.config as config +import dpnp.dpnp_container as dpnp_container import dpnp.dpnp_utils as utils_py from dpnp.dpnp_array import dpnp_array -cimport dpctl as c_dpctl import dpctl cimport cpython @@ -74,14 +74,22 @@ include "dpnp_algo_statistics.pyx" include "dpnp_algo_trigonometric.pyx" -ctypedef void(*fptr_dpnp_arange_t)(size_t, size_t, void *, size_t) -ctypedef void(*fptr_dpnp_astype_t)(const void *, void * , const size_t) -ctypedef void(*fptr_dpnp_flatten_t)(void *, const size_t, const size_t, - const shape_elem_type * , const shape_elem_type * , - void *, const size_t, const size_t, - const shape_elem_type * , const shape_elem_type * , - const long * ) -ctypedef void(*fptr_dpnp_initval_t)(void *, void * , size_t) +ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_arange_t)(c_dpctl.DPCTLSyclQueueRef, + size_t, size_t, void *, size_t, + const c_dpctl.DPCTLEventVectorRef) +ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_astype_t)(c_dpctl.DPCTLSyclQueueRef, + const void *, void * , const size_t, + const c_dpctl.DPCTLEventVectorRef) +ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_flatten_t)(c_dpctl.DPCTLSyclQueueRef, + void *, const size_t, const size_t, + const shape_elem_type * , const shape_elem_type * , + void *, const size_t, const size_t, + const shape_elem_type * , const shape_elem_type * , + const long * , + const c_dpctl.DPCTLEventVectorRef) +ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_initval_t)(c_dpctl.DPCTLSyclQueueRef, + void *, void * , size_t, + const c_dpctl.DPCTLEventVectorRef) cpdef utils.dpnp_descriptor dpnp_arange(start, stop, step, dtype): @@ -92,15 +100,27 @@ cpdef utils.dpnp_descriptor dpnp_arange(start, stop, step, dtype): cdef tuple obj_shape = utils._object_to_tuple(obj_len) cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(dtype) - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_ARANGE, param1_type, param1_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_ARANGE_EXT, param1_type, param1_type) cdef utils.dpnp_descriptor result = utils.create_output_descriptor(obj_shape, kernel_data.return_type, None) # for i in range(result.size): # result[i] = start + i + result_sycl_queue = result.get_array().sycl_queue + + cdef c_dpctl.SyclQueue q = result_sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() + cdef fptr_dpnp_arange_t func = kernel_data.ptr - func(start, step, result.get_data(), result.size) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, + start, + step, + result.get_data(), + result.size, + NULL) # dep_events_ref) + with nogil: c_dpctl.DPCTLEvent_Wait(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) return result @@ -109,7 +129,7 @@ cpdef utils.dpnp_descriptor dpnp_astype(utils.dpnp_descriptor x1, dtype): cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(x1.dtype) cdef DPNPFuncType param2_type = dpnp_dtype_to_DPNPFuncType(dtype) - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_ASTYPE, param1_type, param2_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_ASTYPE_EXT, param1_type, param2_type) x1_obj = x1.get_array() @@ -122,8 +142,16 @@ cpdef utils.dpnp_descriptor dpnp_astype(utils.dpnp_descriptor x1, dtype): usm_type=x1_obj.usm_type, sycl_queue=x1_obj.sycl_queue) + result_sycl_queue = result.get_array().sycl_queue + + cdef c_dpctl.SyclQueue q = result_sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() + cdef fptr_dpnp_astype_t func = kernel_data.ptr - func(x1.get_data(), result.get_data(), x1.size) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, x1.get_data(), result.get_data(), x1.size, NULL) + + with nogil: c_dpctl.DPCTLEvent_Wait(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) return result @@ -131,7 +159,7 @@ cpdef utils.dpnp_descriptor dpnp_astype(utils.dpnp_descriptor x1, dtype): cpdef utils.dpnp_descriptor dpnp_flatten(utils.dpnp_descriptor x1): cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(x1.dtype) - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_FLATTEN, param1_type, param1_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_FLATTEN_EXT, param1_type, param1_type) cdef shape_type_c x1_shape = x1.shape cdef shape_type_c x1_strides = utils.strides_to_vector(x1.strides, x1_shape) @@ -147,20 +175,30 @@ cpdef utils.dpnp_descriptor dpnp_flatten(utils.dpnp_descriptor x1): usm_type=x1_obj.usm_type, sycl_queue=x1_obj.sycl_queue) + result_sycl_queue = result.get_array().sycl_queue + + cdef c_dpctl.SyclQueue q = result_sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() + cdef shape_type_c result_strides = utils.strides_to_vector(result.strides, result_shape) cdef fptr_dpnp_flatten_t func = kernel_data.ptr - func(result.get_data(), - result.size, - result.ndim, - result_shape.data(), - result_strides.data(), - x1.get_data(), - x1.size, - x1.ndim, - x1_shape.data(), - x1_strides.data(), - NULL) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, + result.get_data(), + result.size, + result.ndim, + result_shape.data(), + result_strides.data(), + x1.get_data(), + x1.size, + x1.ndim, + x1_shape.data(), + x1_strides.data(), + NULL, + NULL) # dep_events_ref + + with nogil: c_dpctl.DPCTLEvent_Wait(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) return result @@ -171,16 +209,29 @@ cpdef utils.dpnp_descriptor dpnp_init_val(shape, dtype, value): """ cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(dtype) - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_INITVAL, param1_type, param1_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_INITVAL_EXT, param1_type, param1_type) cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(shape, dtype, None) + result_obj = result.get_array() + # TODO: find better way to pass single value with type conversion - cdef utils.dpnp_descriptor val_arr = utils_py.create_output_descriptor_py((1, ), dtype, None) + cdef utils.dpnp_descriptor val_arr = utils_py.create_output_descriptor_py((1, ), + dtype, + None, + device=result_obj.sycl_device, + usm_type=result_obj.usm_type, + sycl_queue=result_obj.sycl_queue) val_arr.get_pyobj()[0] = value + cdef c_dpctl.SyclQueue q = result_obj.sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() + cdef fptr_dpnp_initval_t func = kernel_data.ptr - func(result.get_data(), val_arr.get_data(), result.size) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, result.get_data(), val_arr.get_data(), result.size, NULL) + + with nogil: c_dpctl.DPCTLEvent_Wait(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) return result @@ -280,9 +331,17 @@ cdef utils.dpnp_descriptor call_fptr_1out(DPNPFuncName fptr_name, # Create result array with type given by FPTR data cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, kernel_data.return_type, None) + result_sycl_queue = result.get_array().sycl_queue + + cdef c_dpctl.SyclQueue q = result_sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() + cdef fptr_1out_t func = kernel_data.ptr # Call FPTR function - func(result.get_data(), result.size) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, result.get_data(), result.size, NULL) + + with nogil: c_dpctl.DPCTLEvent_Wait(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) return result @@ -320,9 +379,17 @@ cdef utils.dpnp_descriptor call_fptr_1in_1out(DPNPFuncName fptr_name, result = out + result_sycl_queue = result.get_array().sycl_queue + + cdef c_dpctl.SyclQueue q = result_sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() + cdef fptr_1in_1out_t func = kernel_data.ptr - func(x1.get_data(), result.get_data(), x1.size) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, x1.get_data(), result.get_data(), x1.size, NULL) + + with nogil: c_dpctl.DPCTLEvent_Wait(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) return result @@ -365,21 +432,31 @@ cdef utils.dpnp_descriptor call_fptr_1in_1out_strides(DPNPFuncName fptr_name, result = out + result_sycl_queue = result.get_array().sycl_queue + + cdef c_dpctl.SyclQueue q = result_sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() + cdef shape_type_c result_strides = utils.strides_to_vector(result.strides, result_shape) """ Call FPTR function """ cdef fptr_1in_1out_strides_t func = kernel_data.ptr - func(result.get_data(), - result.size, - result.ndim, - result_shape.data(), - result_strides.data(), - x1.get_data(), - x1.size, - x1.ndim, - x1_shape.data(), - x1_strides.data(), - NULL) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, + result.get_data(), + result.size, + result.ndim, + result_shape.data(), + result_strides.data(), + x1.get_data(), + x1.size, + x1.ndim, + x1_shape.data(), + x1_strides.data(), + NULL, + NULL) # dep_events_ref + + with nogil: c_dpctl.DPCTLEvent_Wait(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) return result @@ -424,18 +501,26 @@ cdef utils.dpnp_descriptor call_fptr_2in_1out(DPNPFuncName fptr_name, result = out + cdef c_dpctl.SyclQueue q = result_sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() + """ Call FPTR function """ cdef fptr_2in_1out_t func = kernel_data.ptr - func(result.get_data(), - x1_obj.get_data(), - x1_obj.size, - x1_shape.data(), - x1_shape.size(), - x2_obj.get_data(), - x2_obj.size, - x2_shape.data(), - x2_shape.size(), - NULL) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, + result.get_data(), + x1_obj.get_data(), + x1_obj.size, + x1_shape.data(), + x1_shape.size(), + x2_obj.get_data(), + x2_obj.size, + x2_shape.data(), + x2_shape.size(), + NULL, + NULL) # dep_events_ref) + + with nogil: c_dpctl.DPCTLEvent_Wait(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) return result @@ -485,23 +570,33 @@ cdef utils.dpnp_descriptor call_fptr_2in_1out_strides(DPNPFuncName fptr_name, cdef shape_type_c result_strides = utils.strides_to_vector(result.strides, result_shape) + result_obj = result.get_array() + + cdef c_dpctl.SyclQueue q = result_obj.sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() + """ Call FPTR function """ cdef fptr_2in_1out_strides_t func = kernel_data.ptr - func(result.get_data(), - result.size, - result.ndim, - result_shape.data(), - result_strides.data(), - x1_obj.get_data(), - x1_obj.size, - x1_obj.ndim, - x1_shape.data(), - x1_strides.data(), - x2_obj.get_data(), - x2_obj.size, - x2_obj.ndim, - x2_shape.data(), - x2_strides.data(), - NULL) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, + result.get_data(), + result.size, + result.ndim, + result_shape.data(), + result_strides.data(), + x1_obj.get_data(), + x1_obj.size, + x1_obj.ndim, + x1_shape.data(), + x1_strides.data(), + x2_obj.get_data(), + x2_obj.size, + x2_obj.ndim, + x2_shape.data(), + x2_strides.data(), + NULL, + NULL) # dep_events_ref) + + with nogil: c_dpctl.DPCTLEvent_Wait(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) return result diff --git a/dpnp/dpnp_algo/dpnp_algo_arraycreation.pyx b/dpnp/dpnp_algo/dpnp_algo_arraycreation.pyx index 6217e31d2a9a..11912d2423cc 100644 --- a/dpnp/dpnp_algo/dpnp_algo_arraycreation.pyx +++ b/dpnp/dpnp_algo/dpnp_algo_arraycreation.pyx @@ -58,47 +58,91 @@ __all__ += [ ] -ctypedef void(*custom_1in_1out_func_ptr_t)(void *, void * , const int , shape_elem_type * , shape_elem_type * , const size_t, const size_t) +ctypedef c_dpctl.DPCTLSyclEventRef(*custom_1in_1out_func_ptr_t)(c_dpctl.DPCTLSyclQueueRef, + void *, + void * , + const int , + shape_elem_type * , + shape_elem_type * , + const size_t, + const size_t, + const c_dpctl.DPCTLEventVectorRef) ctypedef void(*ftpr_custom_vander_1in_1out_t)(void * , void * , size_t, size_t, int) -ctypedef void(*custom_arraycreation_1in_1out_func_ptr_t)(void *, const size_t, const size_t, const shape_elem_type*, const shape_elem_type*, - void *, const size_t, const size_t, const shape_elem_type*, const shape_elem_type*, - const shape_elem_type *, const size_t) +ctypedef c_dpctl.DPCTLSyclEventRef(*custom_arraycreation_1in_1out_func_ptr_t)(c_dpctl.DPCTLSyclQueueRef, + void *, + const size_t, + const size_t, + const shape_elem_type*, + const shape_elem_type*, + void *, + const size_t, + const size_t, + const shape_elem_type*, + const shape_elem_type*, + const shape_elem_type *, + const size_t, + const c_dpctl.DPCTLEventVectorRef) ctypedef void(*custom_indexing_1out_func_ptr_t)(void * , const size_t , const size_t , const int) -ctypedef void(*fptr_dpnp_eye_t)(void *, int , const shape_elem_type * ) +ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_eye_t)(c_dpctl.DPCTLSyclQueueRef, + void *, int , const shape_elem_type * , + const c_dpctl.DPCTLEventVectorRef) ctypedef void(*fptr_dpnp_trace_t)(const void *, void * , const shape_elem_type * , const size_t) cpdef utils.dpnp_descriptor dpnp_copy(utils.dpnp_descriptor x1): - return call_fptr_1in_1out_strides(DPNP_FN_COPY, x1) + return call_fptr_1in_1out_strides(DPNP_FN_COPY_EXT, x1) cpdef utils.dpnp_descriptor dpnp_diag(utils.dpnp_descriptor v, int k): cdef shape_type_c input_shape = v.shape + cdef shape_type_c result_shape if v.ndim == 1: n = v.shape[0] + abs(k) - shape_result = (n, n) + result_shape = (n, n) else: n = min(v.shape[0], v.shape[0] + k, v.shape[1], v.shape[1] - k) if n < 0: n = 0 - shape_result = (n, ) + result_shape = (n, ) - result_obj = dpnp.zeros(shape_result, dtype=v.dtype) # TODO need to call dpnp_zero instead + v_obj = v.get_array() + + result_obj = dpnp_container.empty(result_shape, + dtype=v.dtype, + device=v_obj.sycl_device, + usm_type=v_obj.usm_type, + sycl_queue=v_obj.sycl_queue) cdef utils.dpnp_descriptor result = dpnp_descriptor(result_obj) cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(v.dtype) - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_DIAG, param1_type, param1_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_DIAG_EXT, param1_type, param1_type) result_type = dpnp_DPNPFuncType_to_dtype(< size_t > kernel_data.return_type) + + result_sycl_queue = result.get_array().sycl_queue + + cdef c_dpctl.SyclQueue q = result_sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() + cdef custom_1in_1out_func_ptr_t func = kernel_data.ptr - cdef shape_type_c result_shape = result.shape - func(v.get_data(), result.get_data(), k, input_shape.data(), result_shape.data(), v.ndim, result.ndim) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, + v.get_data(), + result.get_data(), + k, + input_shape.data(), + result_shape.data(), + v.ndim, + result.ndim, + NULL) # dep_events_ref + + with nogil: c_dpctl.DPCTLEvent_Wait(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) return result @@ -112,15 +156,23 @@ cpdef utils.dpnp_descriptor dpnp_eye(N, M=None, k=0, dtype=None): cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(dtype) - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_EYE, param1_type, param1_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_EYE_EXT, param1_type, param1_type) cdef utils.dpnp_descriptor result = utils.create_output_descriptor((N, M), kernel_data.return_type, None) + result_sycl_queue = result.get_array().sycl_queue + + cdef c_dpctl.SyclQueue q = result_sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() + cdef fptr_dpnp_eye_t func = kernel_data.ptr cdef shape_type_c result_shape = result.shape - func(result.get_data(), k, result_shape.data()) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, result.get_data(), k, result_shape.data(), NULL) + + with nogil: c_dpctl.DPCTLEvent_Wait(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) return result @@ -130,20 +182,34 @@ cpdef utils.dpnp_descriptor dpnp_full(result_shape, value_in, result_dtype): cdef DPNPFuncType dtype_in = dpnp_dtype_to_DPNPFuncType(result_dtype) # get the FPTR data structure - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_FULL, dtype_in, DPNP_FT_NONE) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_FULL_EXT, dtype_in, DPNP_FT_NONE) + + # ceate result array with type given by FPTR data + cdef shape_type_c result_shape_c = utils._object_to_tuple(result_shape) + cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape_c, kernel_data.return_type, None) + + result_obj = result.get_array() # Create single-element input fill array with type given by FPTR data cdef shape_type_c shape_in = (1,) - cdef utils.dpnp_descriptor array_fill = utils.create_output_descriptor(shape_in, kernel_data.return_type, None) + cdef utils.dpnp_descriptor array_fill = utils.create_output_descriptor(shape_in, + kernel_data.return_type, + None, + device=result_obj.sycl_device, + usm_type=result_obj.usm_type, + sycl_queue=result_obj.sycl_queue) array_fill.get_pyobj()[0] = value_in - # ceate result array with type given by FPTR data - cdef shape_type_c result_shape_c = utils._object_to_tuple(result_shape) - cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape_c, kernel_data.return_type, None) + cdef c_dpctl.SyclQueue q = result_obj.sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() cdef fptr_1in_1out_t func = kernel_data.ptr # Call FPTR function - func(array_fill.get_data(), result.get_data(), result.size) + + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, array_fill.get_data(), result.get_data(), result.size, NULL) + + with nogil: c_dpctl.DPCTLEvent_Wait(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) return result @@ -153,20 +219,33 @@ cpdef utils.dpnp_descriptor dpnp_full_like(result_shape, value_in, result_dtype) cdef DPNPFuncType dtype_in = dpnp_dtype_to_DPNPFuncType(result_dtype) # get the FPTR data structure - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_FULL_LIKE, dtype_in, DPNP_FT_NONE) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_FULL_LIKE_EXT, dtype_in, DPNP_FT_NONE) + + # ceate result array with type given by FPTR data + cdef shape_type_c result_shape_c = utils._object_to_tuple(result_shape) + cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape_c, kernel_data.return_type, None) + + result_obj = result.get_array() # Create single-element input fill array with type given by FPTR data cdef shape_type_c shape_in = (1,) - cdef utils.dpnp_descriptor array_fill = utils.create_output_descriptor(shape_in, kernel_data.return_type, None) + cdef utils.dpnp_descriptor array_fill = utils.create_output_descriptor(shape_in, + kernel_data.return_type, + None, + device=result_obj.sycl_device, + usm_type=result_obj.usm_type, + sycl_queue=result_obj.sycl_queue) array_fill.get_pyobj()[0] = value_in - # ceate result array with type given by FPTR data - cdef shape_type_c result_shape_c = utils._object_to_tuple(result_shape) - cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape_c, kernel_data.return_type, None) + cdef c_dpctl.SyclQueue q = result_obj.sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() cdef fptr_1in_1out_t func = kernel_data.ptr # Call FPTR function - func(array_fill.get_data(), result.get_data(), result.size) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, array_fill.get_data(), result.get_data(), result.size, NULL) + + with nogil: c_dpctl.DPCTLEvent_Wait(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) return result @@ -202,13 +281,21 @@ cpdef utils.dpnp_descriptor dpnp_geomspace(start, stop, num, endpoint, dtype, ax cpdef utils.dpnp_descriptor dpnp_identity(n, result_dtype): cdef DPNPFuncType dtype_in = dpnp_dtype_to_DPNPFuncType(result_dtype) - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_IDENTITY, dtype_in, DPNP_FT_NONE) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_IDENTITY_EXT, dtype_in, DPNP_FT_NONE) cdef shape_type_c shape_in = (n, n) cdef utils.dpnp_descriptor result = utils.create_output_descriptor(shape_in, kernel_data.return_type, None) + result_sycl_queue = result.get_array().sycl_queue + + cdef c_dpctl.SyclQueue q = result_sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() + cdef fptr_1out_t func = kernel_data.ptr - func(result.get_data(), n) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, result.get_data(), n, NULL) + + with nogil: c_dpctl.DPCTLEvent_Wait(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) return result @@ -293,11 +380,11 @@ cpdef list dpnp_meshgrid(xi, copy, sparse, indexing): cpdef utils.dpnp_descriptor dpnp_ones(result_shape, result_dtype): - return call_fptr_1out(DPNP_FN_ONES, utils._object_to_tuple(result_shape), result_dtype) + return call_fptr_1out(DPNP_FN_ONES_EXT, utils._object_to_tuple(result_shape), result_dtype) cpdef utils.dpnp_descriptor dpnp_ones_like(result_shape, result_dtype): - return call_fptr_1out(DPNP_FN_ONES_LIKE, utils._object_to_tuple(result_shape), result_dtype) + return call_fptr_1out(DPNP_FN_ONES_LIKE_EXT, utils._object_to_tuple(result_shape), result_dtype) cpdef dpnp_ptp(utils.dpnp_descriptor arr, axis=None): @@ -330,11 +417,16 @@ cpdef dpnp_ptp(utils.dpnp_descriptor arr, axis=None): cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(arr.dtype) - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_PTP, param1_type, param1_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_PTP_EXT, param1_type, param1_type) - cdef utils.dpnp_descriptor result = utils.create_output_descriptor(output_shape, kernel_data.return_type, None) + arr_obj = arr.get_array() - cdef custom_arraycreation_1in_1out_func_ptr_t func = kernel_data.ptr + cdef utils.dpnp_descriptor result = utils.create_output_descriptor(output_shape, + kernel_data.return_type, + None, + device=arr_obj.sycl_device, + usm_type=arr_obj.usm_type, + sycl_queue=arr_obj.sycl_queue) cdef shape_type_c axis1 cdef Py_ssize_t axis_size = 0 @@ -351,8 +443,29 @@ cpdef dpnp_ptp(utils.dpnp_descriptor arr, axis=None): cdef shape_type_c result_strides = utils.strides_to_vector(result.strides, result.shape) cdef shape_type_c arr_strides = utils.strides_to_vector(arr.strides, arr.shape) - func(result.get_data(), result.size, result.ndim, output_shape.data(), result_strides.data(), - arr.get_data(), arr.size, arr.ndim, shape_arr.data(), arr_strides.data(), axis2.data(), axis_size) + result_sycl_queue = result.get_array().sycl_queue + + cdef c_dpctl.SyclQueue q = result_sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() + + cdef custom_arraycreation_1in_1out_func_ptr_t func = kernel_data.ptr + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, + result.get_data(), + result.size, + result.ndim, + output_shape.data(), + result_strides.data(), + arr.get_data(), + arr.size, + arr.ndim, + shape_arr.data(), + arr_strides.data(), + axis2.data(), + axis_size, + NULL) # dep_events_ref + + with nogil: c_dpctl.DPCTLEvent_Wait(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) return result @@ -414,13 +527,36 @@ cpdef utils.dpnp_descriptor dpnp_tril(utils.dpnp_descriptor m, int k): result_shape = m.shape cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(m.dtype) - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_TRIL, param1_type, param1_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_TRIL_EXT, param1_type, param1_type) + + m_obj = m.get_array() # ceate result array with type given by FPTR data - cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, kernel_data.return_type, None) + cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, + kernel_data.return_type, + None, + device=m_obj.sycl_device, + usm_type=m_obj.usm_type, + sycl_queue=m_obj.sycl_queue) + + result_sycl_queue = result.get_array().sycl_queue + + cdef c_dpctl.SyclQueue q = result_sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() cdef custom_1in_1out_func_ptr_t func = kernel_data.ptr - func(m.get_data(), result.get_data(), k, input_shape.data(), result_shape.data(), m.ndim, result.ndim) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, + m.get_data(), + result.get_data(), + k, + input_shape.data(), + result_shape.data(), + m.ndim, + result.ndim, + NULL) # dep_events_ref + + with nogil: c_dpctl.DPCTLEvent_Wait(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) return result @@ -435,13 +571,36 @@ cpdef utils.dpnp_descriptor dpnp_triu(utils.dpnp_descriptor m, int k): result_shape = m.shape cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(m.dtype) - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_TRIU, param1_type, param1_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_TRIU_EXT, param1_type, param1_type) + + m_obj = m.get_array() # ceate result array with type given by FPTR data - cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, kernel_data.return_type, None) + cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, + kernel_data.return_type, + None, + device=m_obj.sycl_device, + usm_type=m_obj.usm_type, + sycl_queue=m_obj.sycl_queue) + + result_sycl_queue = result.get_array().sycl_queue + + cdef c_dpctl.SyclQueue q = result_sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() cdef custom_1in_1out_func_ptr_t func = kernel_data.ptr - func(m.get_data(), result.get_data(), k, input_shape.data(), result_shape.data(), m.ndim, result.ndim) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, + m.get_data(), + result.get_data(), + k, + input_shape.data(), + result_shape.data(), + m.ndim, + result.ndim, + NULL) # dep_events_ref + + with nogil: c_dpctl.DPCTLEvent_Wait(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) return result @@ -461,8 +620,8 @@ cpdef utils.dpnp_descriptor dpnp_vander(utils.dpnp_descriptor x1, int N, int inc cpdef utils.dpnp_descriptor dpnp_zeros(result_shape, result_dtype): - return call_fptr_1out(DPNP_FN_ZEROS, utils._object_to_tuple(result_shape), result_dtype) + return call_fptr_1out(DPNP_FN_ZEROS_EXT, utils._object_to_tuple(result_shape), result_dtype) cpdef utils.dpnp_descriptor dpnp_zeros_like(result_shape, result_dtype): - return call_fptr_1out(DPNP_FN_ZEROS_LIKE, utils._object_to_tuple(result_shape), result_dtype) + return call_fptr_1out(DPNP_FN_ZEROS_LIKE_EXT, utils._object_to_tuple(result_shape), result_dtype) diff --git a/dpnp/dpnp_algo/dpnp_algo_bitwise.pyx b/dpnp/dpnp_algo/dpnp_algo_bitwise.pyx index b09304033098..482f00c2c71d 100644 --- a/dpnp/dpnp_algo/dpnp_algo_bitwise.pyx +++ b/dpnp/dpnp_algo/dpnp_algo_bitwise.pyx @@ -49,7 +49,7 @@ cpdef utils.dpnp_descriptor dpnp_bitwise_and(utils.dpnp_descriptor x1_obj, object dtype=None, utils.dpnp_descriptor out=None, object where=True): - return call_fptr_2in_1out_strides(DPNP_FN_BITWISE_AND, x1_obj, x2_obj, dtype=dtype, out=out, where=where) + return call_fptr_2in_1out_strides(DPNP_FN_BITWISE_AND_EXT, x1_obj, x2_obj, dtype=dtype, out=out, where=where) cpdef utils.dpnp_descriptor dpnp_bitwise_or(utils.dpnp_descriptor x1_obj, @@ -57,7 +57,7 @@ cpdef utils.dpnp_descriptor dpnp_bitwise_or(utils.dpnp_descriptor x1_obj, object dtype=None, utils.dpnp_descriptor out=None, object where=True): - return call_fptr_2in_1out_strides(DPNP_FN_BITWISE_OR, x1_obj, x2_obj, dtype=dtype, out=out, where=where) + return call_fptr_2in_1out_strides(DPNP_FN_BITWISE_OR_EXT, x1_obj, x2_obj, dtype=dtype, out=out, where=where) cpdef utils.dpnp_descriptor dpnp_bitwise_xor(utils.dpnp_descriptor x1_obj, @@ -65,11 +65,11 @@ cpdef utils.dpnp_descriptor dpnp_bitwise_xor(utils.dpnp_descriptor x1_obj, object dtype=None, utils.dpnp_descriptor out=None, object where=True): - return call_fptr_2in_1out_strides(DPNP_FN_BITWISE_XOR, x1_obj, x2_obj, dtype=dtype, out=out, where=where) + return call_fptr_2in_1out_strides(DPNP_FN_BITWISE_XOR_EXT, x1_obj, x2_obj, dtype=dtype, out=out, where=where) cpdef utils.dpnp_descriptor dpnp_invert(utils.dpnp_descriptor arr): - return call_fptr_1in_1out(DPNP_FN_INVERT, arr, arr.shape) + return call_fptr_1in_1out(DPNP_FN_INVERT_EXT, arr, arr.shape) cpdef utils.dpnp_descriptor dpnp_left_shift(utils.dpnp_descriptor x1_obj, @@ -77,11 +77,11 @@ cpdef utils.dpnp_descriptor dpnp_left_shift(utils.dpnp_descriptor x1_obj, object dtype=None, utils.dpnp_descriptor out=None, object where=True): - return call_fptr_2in_1out_strides(DPNP_FN_LEFT_SHIFT, x1_obj, x2_obj, dtype=dtype, out=out, where=where) + return call_fptr_2in_1out_strides(DPNP_FN_LEFT_SHIFT_EXT, x1_obj, x2_obj, dtype=dtype, out=out, where=where) cpdef utils.dpnp_descriptor dpnp_right_shift(utils.dpnp_descriptor x1_obj, utils.dpnp_descriptor x2_obj, object dtype=None, utils.dpnp_descriptor out=None, object where=True): - return call_fptr_2in_1out_strides(DPNP_FN_RIGHT_SHIFT, x1_obj, x2_obj, dtype=dtype, out=out, where=where) + return call_fptr_2in_1out_strides(DPNP_FN_RIGHT_SHIFT_EXT, x1_obj, x2_obj, dtype=dtype, out=out, where=where) diff --git a/dpnp/dpnp_algo/dpnp_algo_counting.pyx b/dpnp/dpnp_algo/dpnp_algo_counting.pyx index fa0f1e53b218..119c0d27b692 100644 --- a/dpnp/dpnp_algo/dpnp_algo_counting.pyx +++ b/dpnp/dpnp_algo/dpnp_algo_counting.pyx @@ -40,4 +40,4 @@ __all__ += [ cpdef utils.dpnp_descriptor dpnp_count_nonzero(utils.dpnp_descriptor x1): - return call_fptr_1in_1out(DPNP_FN_COUNT_NONZERO, x1, (1,)) + return call_fptr_1in_1out(DPNP_FN_COUNT_NONZERO_EXT, x1, (1,)) diff --git a/dpnp/dpnp_algo/dpnp_algo_manipulation.pyx b/dpnp/dpnp_algo/dpnp_algo_manipulation.pyx index 0303d5a3498a..f035761f12a2 100644 --- a/dpnp/dpnp_algo/dpnp_algo_manipulation.pyx +++ b/dpnp/dpnp_algo/dpnp_algo_manipulation.pyx @@ -99,21 +99,31 @@ cpdef dpnp_copyto(utils.dpnp_descriptor dst, utils.dpnp_descriptor src, where=Tr cdef shape_type_c src_strides = utils.strides_to_vector(src.strides, src_shape) # get the FPTR data structure - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_COPYTO, src_type, dst_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_COPYTO_EXT, src_type, dst_type) + + _, _, result_sycl_queue = utils.get_common_usm_allocation(dst, src) + + cdef c_dpctl.SyclQueue q = result_sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() # Call FPTR function cdef fptr_1in_1out_strides_t func = kernel_data.ptr - func(dst.get_data(), - dst.size, - dst.ndim, - dst_shape.data(), - dst_strides.data(), - src.get_data(), - src.size, - src.ndim, - src_shape.data(), - src_strides.data(), - NULL) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, + dst.get_data(), + dst.size, + dst.ndim, + dst_shape.data(), + dst_strides.data(), + src.get_data(), + src.size, + src.ndim, + src_shape.data(), + src_strides.data(), + NULL, + NULL) # dep_events_ref + + with nogil: c_dpctl.DPCTLEvent_Wait(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) cpdef utils.dpnp_descriptor dpnp_expand_dims(utils.dpnp_descriptor in_array, axis): diff --git a/dpnp/dpnp_algo/dpnp_algo_mathematical.pyx b/dpnp/dpnp_algo/dpnp_algo_mathematical.pyx index 2e52c32c10a2..b2c070d95b47 100644 --- a/dpnp/dpnp_algo/dpnp_algo_mathematical.pyx +++ b/dpnp/dpnp_algo/dpnp_algo_mathematical.pyx @@ -105,7 +105,7 @@ cpdef utils.dpnp_descriptor dpnp_add(utils.dpnp_descriptor x1_obj, object dtype=None, utils.dpnp_descriptor out=None, object where=True): - return call_fptr_2in_1out_strides(DPNP_FN_ADD, x1_obj, x2_obj, dtype, out, where) + return call_fptr_2in_1out_strides(DPNP_FN_ADD_EXT, x1_obj, x2_obj, dtype, out, where) cpdef utils.dpnp_descriptor dpnp_arctan2(utils.dpnp_descriptor x1_obj, @@ -113,7 +113,7 @@ cpdef utils.dpnp_descriptor dpnp_arctan2(utils.dpnp_descriptor x1_obj, object dtype=None, utils.dpnp_descriptor out=None, object where=True): - return call_fptr_2in_1out_strides(DPNP_FN_ARCTAN2, x1_obj, x2_obj, dtype, out, where, func_name="arctan2") + return call_fptr_2in_1out_strides(DPNP_FN_ARCTAN2_EXT, x1_obj, x2_obj, dtype, out, where, func_name="arctan2") cpdef utils.dpnp_descriptor dpnp_around(utils.dpnp_descriptor x1, int decimals): @@ -134,11 +134,11 @@ cpdef utils.dpnp_descriptor dpnp_around(utils.dpnp_descriptor x1, int decimals): cpdef utils.dpnp_descriptor dpnp_ceil(utils.dpnp_descriptor x1, utils.dpnp_descriptor out): - return call_fptr_1in_1out_strides(DPNP_FN_CEIL, x1, dtype=None, out=out, where=True, func_name='ceil') + return call_fptr_1in_1out_strides(DPNP_FN_CEIL_EXT, x1, dtype=None, out=out, where=True, func_name='ceil') cpdef utils.dpnp_descriptor dpnp_conjugate(utils.dpnp_descriptor x1): - return call_fptr_1in_1out_strides(DPNP_FN_CONJIGUATE, x1) + return call_fptr_1in_1out_strides(DPNP_FN_CONJIGUATE_EXT, x1) cpdef utils.dpnp_descriptor dpnp_copysign(utils.dpnp_descriptor x1_obj, @@ -146,7 +146,7 @@ cpdef utils.dpnp_descriptor dpnp_copysign(utils.dpnp_descriptor x1_obj, object dtype=None, utils.dpnp_descriptor out=None, object where=True): - return call_fptr_2in_1out_strides(DPNP_FN_COPYSIGN, x1_obj, x2_obj, dtype, out, where) + return call_fptr_2in_1out_strides(DPNP_FN_COPYSIGN_EXT, x1_obj, x2_obj, dtype, out, where) cpdef utils.dpnp_descriptor dpnp_cross(utils.dpnp_descriptor x1_obj, @@ -154,7 +154,7 @@ cpdef utils.dpnp_descriptor dpnp_cross(utils.dpnp_descriptor x1_obj, object dtype=None, utils.dpnp_descriptor out=None, object where=True): - return call_fptr_2in_1out(DPNP_FN_CROSS, x1_obj, x2_obj, dtype, out, where) + return call_fptr_2in_1out(DPNP_FN_CROSS_EXT, x1_obj, x2_obj, dtype, out, where) cpdef utils.dpnp_descriptor dpnp_cumprod(utils.dpnp_descriptor x1): @@ -166,7 +166,7 @@ cpdef utils.dpnp_descriptor dpnp_cumprod(utils.dpnp_descriptor x1): # >>> res.shape # (4,) - return call_fptr_1in_1out(DPNP_FN_CUMPROD, x1, (x1.size,)) + return call_fptr_1in_1out(DPNP_FN_CUMPROD_EXT, x1, (x1.size,)) cpdef utils.dpnp_descriptor dpnp_cumsum(utils.dpnp_descriptor x1): @@ -178,7 +178,7 @@ cpdef utils.dpnp_descriptor dpnp_cumsum(utils.dpnp_descriptor x1): # >>> res.shape # (4,) - return call_fptr_1in_1out(DPNP_FN_CUMSUM, x1, (x1.size,)) + return call_fptr_1in_1out(DPNP_FN_CUMSUM_EXT, x1, (x1.size,)) cpdef utils.dpnp_descriptor dpnp_diff(utils.dpnp_descriptor x1, int n): @@ -203,7 +203,7 @@ cpdef utils.dpnp_descriptor dpnp_divide(utils.dpnp_descriptor x1_obj, object dtype=None, utils.dpnp_descriptor out=None, object where=True): - return call_fptr_2in_1out_strides(DPNP_FN_DIVIDE, x1_obj, x2_obj, dtype, out, where) + return call_fptr_2in_1out_strides(DPNP_FN_DIVIDE_EXT, x1_obj, x2_obj, dtype, out, where) cpdef utils.dpnp_descriptor dpnp_ediff1d(utils.dpnp_descriptor x1): @@ -215,7 +215,7 @@ cpdef utils.dpnp_descriptor dpnp_ediff1d(utils.dpnp_descriptor x1): cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(x1.dtype) # get the FPTR data structure - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_EDIFF1D, param1_type, param1_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_EDIFF1D_EXT, param1_type, param1_type) result_type = dpnp_DPNPFuncType_to_dtype( < size_t > kernel_data.return_type) @@ -223,33 +223,51 @@ cpdef utils.dpnp_descriptor dpnp_ediff1d(utils.dpnp_descriptor x1): cdef shape_type_c x1_shape = (x1.size,) cdef shape_type_c x1_strides = utils.strides_to_vector(None, x1_shape) + x1_obj = x1.get_array() + cdef shape_type_c result_shape = (x1.size - 1,) - cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, kernel_data.return_type, None) + cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, + kernel_data.return_type, + None, + device=x1_obj.sycl_device, + usm_type=x1_obj.usm_type, + sycl_queue=x1_obj.sycl_queue) + cdef shape_type_c result_strides = utils.strides_to_vector(result.strides, result_shape) + result_sycl_queue = result.get_array().sycl_queue + + cdef c_dpctl.SyclQueue q = result_sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() + # Call FPTR function cdef fptr_1in_1out_strides_t func = kernel_data.ptr - func(result.get_data(), - result.size, - result.ndim, - result_shape.data(), - result_strides.data(), - x1.get_data(), - x1.size, - x1.ndim, - x1_shape.data(), - x1_strides.data(), - NULL) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, + result.get_data(), + result.size, + result.ndim, + result_shape.data(), + result_strides.data(), + x1.get_data(), + x1.size, + x1.ndim, + x1_shape.data(), + x1_strides.data(), + NULL, + NULL) # dep_events_ref + + with nogil: c_dpctl.DPCTLEvent_Wait(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) return result cpdef utils.dpnp_descriptor dpnp_fabs(utils.dpnp_descriptor x1): - return call_fptr_1in_1out_strides(DPNP_FN_FABS, x1) + return call_fptr_1in_1out_strides(DPNP_FN_FABS_EXT, x1) cpdef utils.dpnp_descriptor dpnp_floor(utils.dpnp_descriptor x1, utils.dpnp_descriptor out): - return call_fptr_1in_1out_strides(DPNP_FN_FLOOR, x1, dtype=None, out=out, where=True, func_name='floor') + return call_fptr_1in_1out_strides(DPNP_FN_FLOOR_EXT, x1, dtype=None, out=out, where=True, func_name='floor') cpdef utils.dpnp_descriptor dpnp_floor_divide(utils.dpnp_descriptor x1_obj, @@ -257,7 +275,7 @@ cpdef utils.dpnp_descriptor dpnp_floor_divide(utils.dpnp_descriptor x1_obj, object dtype=None, utils.dpnp_descriptor out=None, object where=True): - return call_fptr_2in_1out(DPNP_FN_FLOOR_DIVIDE, x1_obj, x2_obj, dtype, out, where) + return call_fptr_2in_1out(DPNP_FN_FLOOR_DIVIDE_EXT, x1_obj, x2_obj, dtype, out, where) cpdef utils.dpnp_descriptor dpnp_fmod(utils.dpnp_descriptor x1_obj, @@ -265,7 +283,7 @@ cpdef utils.dpnp_descriptor dpnp_fmod(utils.dpnp_descriptor x1_obj, object dtype=None, utils.dpnp_descriptor out=None, object where=True): - return call_fptr_2in_1out_strides(DPNP_FN_FMOD, x1_obj, x2_obj, dtype, out, where) + return call_fptr_2in_1out_strides(DPNP_FN_FMOD_EXT, x1_obj, x2_obj, dtype, out, where) cpdef utils.dpnp_descriptor dpnp_gradient(utils.dpnp_descriptor y1, int dx=1): @@ -296,7 +314,7 @@ cpdef utils.dpnp_descriptor dpnp_hypot(utils.dpnp_descriptor x1_obj, object dtype=None, utils.dpnp_descriptor out=None, object where=True): - return call_fptr_2in_1out_strides(DPNP_FN_HYPOT, x1_obj, x2_obj, dtype, out, where) + return call_fptr_2in_1out_strides(DPNP_FN_HYPOT_EXT, x1_obj, x2_obj, dtype, out, where) cpdef utils.dpnp_descriptor dpnp_maximum(utils.dpnp_descriptor x1_obj, @@ -304,7 +322,7 @@ cpdef utils.dpnp_descriptor dpnp_maximum(utils.dpnp_descriptor x1_obj, object dtype=None, utils.dpnp_descriptor out=None, object where=True): - return call_fptr_2in_1out_strides(DPNP_FN_MAXIMUM, x1_obj, x2_obj, dtype, out, where) + return call_fptr_2in_1out_strides(DPNP_FN_MAXIMUM_EXT, x1_obj, x2_obj, dtype, out, where) cpdef utils.dpnp_descriptor dpnp_minimum(utils.dpnp_descriptor x1_obj, @@ -312,7 +330,7 @@ cpdef utils.dpnp_descriptor dpnp_minimum(utils.dpnp_descriptor x1_obj, object dtype=None, utils.dpnp_descriptor out=None, object where=True): - return call_fptr_2in_1out_strides(DPNP_FN_MINIMUM, x1_obj, x2_obj, dtype, out, where) + return call_fptr_2in_1out_strides(DPNP_FN_MINIMUM_EXT, x1_obj, x2_obj, dtype, out, where) cpdef tuple dpnp_modf(utils.dpnp_descriptor x1): @@ -339,7 +357,7 @@ cpdef utils.dpnp_descriptor dpnp_multiply(utils.dpnp_descriptor x1_obj, object dtype=None, utils.dpnp_descriptor out=None, object where=True): - return call_fptr_2in_1out_strides(DPNP_FN_MULTIPLY, x1_obj, x2_obj, dtype, out, where) + return call_fptr_2in_1out_strides(DPNP_FN_MULTIPLY_EXT, x1_obj, x2_obj, dtype, out, where) cpdef utils.dpnp_descriptor dpnp_nancumprod(utils.dpnp_descriptor x1): @@ -397,7 +415,7 @@ cpdef utils.dpnp_descriptor dpnp_nansum(utils.dpnp_descriptor x1): cpdef utils.dpnp_descriptor dpnp_negative(dpnp_descriptor x1): - return call_fptr_1in_1out_strides(DPNP_FN_NEGATIVE, x1) + return call_fptr_1in_1out_strides(DPNP_FN_NEGATIVE_EXT, x1) cpdef utils.dpnp_descriptor dpnp_power(utils.dpnp_descriptor x1_obj, @@ -405,7 +423,7 @@ cpdef utils.dpnp_descriptor dpnp_power(utils.dpnp_descriptor x1_obj, object dtype=None, utils.dpnp_descriptor out=None, object where=True): - return call_fptr_2in_1out_strides(DPNP_FN_POWER, x1_obj, x2_obj, dtype, out, where, func_name="power") + return call_fptr_2in_1out_strides(DPNP_FN_POWER_EXT, x1_obj, x2_obj, dtype, out, where, func_name="power") cpdef utils.dpnp_descriptor dpnp_prod(utils.dpnp_descriptor input, @@ -452,11 +470,11 @@ cpdef utils.dpnp_descriptor dpnp_remainder(utils.dpnp_descriptor x1_obj, object dtype=None, utils.dpnp_descriptor out=None, object where=True): - return call_fptr_2in_1out(DPNP_FN_REMAINDER, x1_obj, x2_obj, dtype, out, where) + return call_fptr_2in_1out(DPNP_FN_REMAINDER_EXT, x1_obj, x2_obj, dtype, out, where) cpdef utils.dpnp_descriptor dpnp_sign(utils.dpnp_descriptor x1): - return call_fptr_1in_1out_strides(DPNP_FN_SIGN, x1) + return call_fptr_1in_1out_strides(DPNP_FN_SIGN_EXT, x1) cpdef utils.dpnp_descriptor dpnp_subtract(utils.dpnp_descriptor x1_obj, @@ -464,7 +482,7 @@ cpdef utils.dpnp_descriptor dpnp_subtract(utils.dpnp_descriptor x1_obj, object dtype=None, utils.dpnp_descriptor out=None, object where=True): - return call_fptr_2in_1out_strides(DPNP_FN_SUBTRACT, x1_obj, x2_obj, dtype, out, where) + return call_fptr_2in_1out_strides(DPNP_FN_SUBTRACT_EXT, x1_obj, x2_obj, dtype, out, where) cpdef utils.dpnp_descriptor dpnp_sum(utils.dpnp_descriptor input, @@ -514,4 +532,4 @@ cpdef utils.dpnp_descriptor dpnp_trapz(utils.dpnp_descriptor y1, utils.dpnp_desc cpdef utils.dpnp_descriptor dpnp_trunc(utils.dpnp_descriptor x1, utils.dpnp_descriptor out): - return call_fptr_1in_1out_strides(DPNP_FN_TRUNC, x1, dtype=None, out=out, where=True, func_name='trunc') + return call_fptr_1in_1out_strides(DPNP_FN_TRUNC_EXT, x1, dtype=None, out=out, where=True, func_name='trunc') diff --git a/dpnp/dpnp_algo/dpnp_algo_sorting.pyx b/dpnp/dpnp_algo/dpnp_algo_sorting.pyx index 2fbb37d52df7..93a76e81a18b 100644 --- a/dpnp/dpnp_algo/dpnp_algo_sorting.pyx +++ b/dpnp/dpnp_algo/dpnp_algo_sorting.pyx @@ -50,7 +50,7 @@ cpdef utils.dpnp_descriptor dpnp_argsort(utils.dpnp_descriptor x1): cdef shape_type_c result_shape = x1.shape if result_shape == (): result_shape = (1,) - return call_fptr_1in_1out(DPNP_FN_ARGSORT, x1, result_shape) + return call_fptr_1in_1out(DPNP_FN_ARGSORT_EXT, x1, result_shape) cpdef utils.dpnp_descriptor dpnp_partition(utils.dpnp_descriptor arr, int kth, axis=-1, kind='introselect', order=None): @@ -92,4 +92,4 @@ cpdef utils.dpnp_descriptor dpnp_searchsorted(utils.dpnp_descriptor arr, utils.d cpdef utils.dpnp_descriptor dpnp_sort(utils.dpnp_descriptor x1): - return call_fptr_1in_1out(DPNP_FN_SORT, x1, x1.shape) + return call_fptr_1in_1out(DPNP_FN_SORT_EXT, x1, x1.shape) diff --git a/dpnp/dpnp_algo/dpnp_algo_special.pyx b/dpnp/dpnp_algo/dpnp_algo_special.pyx index a4a06613efb7..fb6ff0d74cc6 100644 --- a/dpnp/dpnp_algo/dpnp_algo_special.pyx +++ b/dpnp/dpnp_algo/dpnp_algo_special.pyx @@ -40,4 +40,4 @@ __all__ += [ cpdef utils.dpnp_descriptor dpnp_erf(utils.dpnp_descriptor x1): - return call_fptr_1in_1out_strides(DPNP_FN_ERF, x1) + return call_fptr_1in_1out_strides(DPNP_FN_ERF_EXT, x1) diff --git a/dpnp/dpnp_algo/dpnp_algo_statistics.pyx b/dpnp/dpnp_algo/dpnp_algo_statistics.pyx index 596b9c4e853a..5ff4c785d96f 100644 --- a/dpnp/dpnp_algo/dpnp_algo_statistics.pyx +++ b/dpnp/dpnp_algo/dpnp_algo_statistics.pyx @@ -103,16 +103,39 @@ cpdef utils.dpnp_descriptor dpnp_correlate(utils.dpnp_descriptor x1, utils.dpnp_ cdef shape_type_c x1_shape = x1.shape cdef shape_type_c x2_shape = x2.shape - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_CORRELATE, param1_type, param2_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_CORRELATE_EXT, param1_type, param2_type) + + result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(x1, x2) # ceate result array with type given by FPTR data cdef shape_type_c result_shape = (1,) - cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, kernel_data.return_type, None) + cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, + kernel_data.return_type, + None, + device=result_sycl_device, + usm_type=result_usm_type, + sycl_queue=result_sycl_queue) + + cdef c_dpctl.SyclQueue q = result_sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() cdef fptr_2in_1out_t func = kernel_data.ptr - func(result.get_data(), x1.get_data(), x1.size, x1_shape.data(), x1_shape.size(), - x2.get_data(), x2.size, x2_shape.data(), x2_shape.size(), NULL) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, + result.get_data(), + x1.get_data(), + x1.size, + x1_shape.data(), + x1_shape.size(), + x2.get_data(), + x2.size, + x2_shape.data(), + x2_shape.size(), + NULL, + NULL) # dep_events_ref + + with nogil: c_dpctl.DPCTLEvent_Wait(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) return result diff --git a/dpnp/dpnp_algo/dpnp_algo_trigonometric.pyx b/dpnp/dpnp_algo/dpnp_algo_trigonometric.pyx index 89644fc6d8d9..6edfb87042fb 100644 --- a/dpnp/dpnp_algo/dpnp_algo_trigonometric.pyx +++ b/dpnp/dpnp_algo/dpnp_algo_trigonometric.pyx @@ -65,103 +65,103 @@ __all__ += [ cpdef utils.dpnp_descriptor dpnp_arccos(utils.dpnp_descriptor x1): - return call_fptr_1in_1out_strides(DPNP_FN_ARCCOS, x1) + return call_fptr_1in_1out_strides(DPNP_FN_ARCCOS_EXT, x1) cpdef utils.dpnp_descriptor dpnp_arccosh(utils.dpnp_descriptor x1): - return call_fptr_1in_1out_strides(DPNP_FN_ARCCOSH, x1) + return call_fptr_1in_1out_strides(DPNP_FN_ARCCOSH_EXT, x1) cpdef utils.dpnp_descriptor dpnp_arcsin(utils.dpnp_descriptor x1, utils.dpnp_descriptor out): - return call_fptr_1in_1out_strides(DPNP_FN_ARCSIN, x1, dtype=None, out=out, where=True, func_name='arcsin') + return call_fptr_1in_1out_strides(DPNP_FN_ARCSIN_EXT, x1, dtype=None, out=out, where=True, func_name='arcsin') cpdef utils.dpnp_descriptor dpnp_arcsinh(utils.dpnp_descriptor x1): - return call_fptr_1in_1out_strides(DPNP_FN_ARCSINH, x1) + return call_fptr_1in_1out_strides(DPNP_FN_ARCSINH_EXT, x1) cpdef utils.dpnp_descriptor dpnp_arctan(utils.dpnp_descriptor x1, utils.dpnp_descriptor out): - return call_fptr_1in_1out_strides(DPNP_FN_ARCTAN, x1, dtype=None, out=out, where=True, func_name='arctan') + return call_fptr_1in_1out_strides(DPNP_FN_ARCTAN_EXT, x1, dtype=None, out=out, where=True, func_name='arctan') cpdef utils.dpnp_descriptor dpnp_arctanh(utils.dpnp_descriptor x1): - return call_fptr_1in_1out_strides(DPNP_FN_ARCTANH, x1) + return call_fptr_1in_1out_strides(DPNP_FN_ARCTANH_EXT, x1) cpdef utils.dpnp_descriptor dpnp_cbrt(utils.dpnp_descriptor x1): - return call_fptr_1in_1out_strides(DPNP_FN_CBRT, x1) + return call_fptr_1in_1out_strides(DPNP_FN_CBRT_EXT, x1) cpdef utils.dpnp_descriptor dpnp_cos(utils.dpnp_descriptor x1, utils.dpnp_descriptor out): - return call_fptr_1in_1out_strides(DPNP_FN_COS, x1, dtype=None, out=out, where=True, func_name='cos') + return call_fptr_1in_1out_strides(DPNP_FN_COS_EXT, x1, dtype=None, out=out, where=True, func_name='cos') cpdef utils.dpnp_descriptor dpnp_cosh(utils.dpnp_descriptor x1): - return call_fptr_1in_1out_strides(DPNP_FN_COSH, x1) + return call_fptr_1in_1out_strides(DPNP_FN_COSH_EXT, x1) cpdef utils.dpnp_descriptor dpnp_degrees(utils.dpnp_descriptor x1): - return call_fptr_1in_1out_strides(DPNP_FN_DEGREES, x1) + return call_fptr_1in_1out_strides(DPNP_FN_DEGREES_EXT, x1) cpdef utils.dpnp_descriptor dpnp_exp(utils.dpnp_descriptor x1, utils.dpnp_descriptor out): - return call_fptr_1in_1out_strides(DPNP_FN_EXP, x1, dtype=None, out=out, where=True, func_name='exp') + return call_fptr_1in_1out_strides(DPNP_FN_EXP_EXT, x1, dtype=None, out=out, where=True, func_name='exp') cpdef utils.dpnp_descriptor dpnp_exp2(utils.dpnp_descriptor x1): - return call_fptr_1in_1out_strides(DPNP_FN_EXP2, x1) + return call_fptr_1in_1out_strides(DPNP_FN_EXP2_EXT, x1) cpdef utils.dpnp_descriptor dpnp_expm1(utils.dpnp_descriptor x1): - return call_fptr_1in_1out_strides(DPNP_FN_EXPM1, x1) + return call_fptr_1in_1out_strides(DPNP_FN_EXPM1_EXT, x1) cpdef utils.dpnp_descriptor dpnp_log(utils.dpnp_descriptor x1, utils.dpnp_descriptor out): - return call_fptr_1in_1out_strides(DPNP_FN_LOG, x1, dtype=None, out=out, where=True, func_name='log') + return call_fptr_1in_1out_strides(DPNP_FN_LOG_EXT, x1, dtype=None, out=out, where=True, func_name='log') cpdef utils.dpnp_descriptor dpnp_log10(utils.dpnp_descriptor x1): - return call_fptr_1in_1out_strides(DPNP_FN_LOG10, x1) + return call_fptr_1in_1out_strides(DPNP_FN_LOG10_EXT, x1) cpdef utils.dpnp_descriptor dpnp_log1p(utils.dpnp_descriptor x1): - return call_fptr_1in_1out_strides(DPNP_FN_LOG1P, x1) + return call_fptr_1in_1out_strides(DPNP_FN_LOG1P_EXT, x1) cpdef utils.dpnp_descriptor dpnp_log2(utils.dpnp_descriptor x1): - return call_fptr_1in_1out_strides(DPNP_FN_LOG2, x1) + return call_fptr_1in_1out_strides(DPNP_FN_LOG2_EXT, x1) cpdef utils.dpnp_descriptor dpnp_recip(utils.dpnp_descriptor x1): - return call_fptr_1in_1out_strides(DPNP_FN_RECIP, x1) + return call_fptr_1in_1out_strides(DPNP_FN_RECIP_EXT, x1) cpdef utils.dpnp_descriptor dpnp_radians(utils.dpnp_descriptor x1): - return call_fptr_1in_1out_strides(DPNP_FN_RADIANS, x1) + return call_fptr_1in_1out_strides(DPNP_FN_RADIANS_EXT, x1) cpdef utils.dpnp_descriptor dpnp_sin(utils.dpnp_descriptor x1, utils.dpnp_descriptor out): - return call_fptr_1in_1out_strides(DPNP_FN_SIN, x1, dtype=None, out=out, where=True, func_name='sin') + return call_fptr_1in_1out_strides(DPNP_FN_SIN_EXT, x1, dtype=None, out=out, where=True, func_name='sin') cpdef utils.dpnp_descriptor dpnp_sinh(utils.dpnp_descriptor x1): - return call_fptr_1in_1out_strides(DPNP_FN_SINH, x1) + return call_fptr_1in_1out_strides(DPNP_FN_SINH_EXT, x1) cpdef utils.dpnp_descriptor dpnp_sqrt(utils.dpnp_descriptor x1): - return call_fptr_1in_1out_strides(DPNP_FN_SQRT, x1) + return call_fptr_1in_1out_strides(DPNP_FN_SQRT_EXT, x1) cpdef utils.dpnp_descriptor dpnp_square(utils.dpnp_descriptor x1): - return call_fptr_1in_1out_strides(DPNP_FN_SQUARE, x1) + return call_fptr_1in_1out_strides(DPNP_FN_SQUARE_EXT, x1) cpdef utils.dpnp_descriptor dpnp_tan(utils.dpnp_descriptor x1, utils.dpnp_descriptor out): - return call_fptr_1in_1out_strides(DPNP_FN_TAN, x1, dtype=None, out=out, where=True, func_name='tan') + return call_fptr_1in_1out_strides(DPNP_FN_TAN_EXT, x1, dtype=None, out=out, where=True, func_name='tan') cpdef utils.dpnp_descriptor dpnp_tanh(utils.dpnp_descriptor x1): - return call_fptr_1in_1out_strides(DPNP_FN_TANH, x1) + return call_fptr_1in_1out_strides(DPNP_FN_TANH_EXT, x1) cpdef utils.dpnp_descriptor dpnp_unwrap(utils.dpnp_descriptor array1): From d7783af49ebaab2588be3d6e683fcea0f38bef40 Mon Sep 17 00:00:00 2001 From: Denis Smirnov Date: Fri, 4 Feb 2022 04:42:29 -0600 Subject: [PATCH 02/18] Extand main cython funcs with queues and events (part 2) --- dpnp/backend/include/dpnp_iface.hpp | 4 +- .../kernels/dpnp_krnl_arraycreation.cpp | 15 +- dpnp/backend/kernels/dpnp_krnl_indexing.cpp | 9 +- dpnp/backend/kernels/dpnp_krnl_linalg.cpp | 4 +- dpnp/backend/kernels/dpnp_krnl_logic.cpp | 12 +- .../kernels/dpnp_krnl_manipulation.cpp | 4 +- .../kernels/dpnp_krnl_mathematical.cpp | 12 +- dpnp/backend/kernels/dpnp_krnl_statistics.cpp | 8 +- dpnp/dpnp_algo/dpnp_algo.pxd | 11 +- dpnp/dpnp_algo/dpnp_algo.pyx | 18 +- dpnp/dpnp_algo/dpnp_algo_arraycreation.pyx | 99 ++++- dpnp/dpnp_algo/dpnp_algo_indexing.pyx | 355 ++++++++++++++---- dpnp/dpnp_algo/dpnp_algo_linearalgebra.pyx | 115 ++++-- dpnp/dpnp_algo/dpnp_algo_logic.pyx | 189 ++++++++-- dpnp/dpnp_algo/dpnp_algo_manipulation.pyx | 114 ++++-- dpnp/dpnp_algo/dpnp_algo_mathematical.pyx | 256 ++++++++++--- dpnp/dpnp_algo/dpnp_algo_searching.pyx | 54 ++- dpnp/dpnp_algo/dpnp_algo_sorting.pyx | 74 +++- dpnp/dpnp_algo/dpnp_algo_statistics.pyx | 335 ++++++++++++----- dpnp/dpnp_algo/dpnp_algo_trigonometric.pyx | 9 +- 20 files changed, 1325 insertions(+), 372 deletions(-) diff --git a/dpnp/backend/include/dpnp_iface.hpp b/dpnp/backend/include/dpnp_iface.hpp index 5b02d087b340..42c05f0fd61d 100644 --- a/dpnp/backend/include/dpnp_iface.hpp +++ b/dpnp/backend/include/dpnp_iface.hpp @@ -533,7 +533,7 @@ INP_DLLEXPORT void dpnp_cumprod_c(void* array1_in, void* result1, size_t size); * @param [in] q_ref Reference to SYCL queue. * @param [in] array1_in Input array. * @param [out] result1 Output array. - * @param [in] size Number of elements in input arrays. + * @param [in] size Number of elements in input arrays. * @param [in] dep_event_vec_ref Reference to vector of SYCL events. * */ @@ -551,7 +551,7 @@ INP_DLLEXPORT void dpnp_cumsum_c(void* array1_in, void* result1, size_t size); * @ingroup BACKEND_API * @brief The differences between consecutive elements of an array. * - * @param [in] q_ref Reference to SYCL queue. + * @param [in] q_ref Reference to SYCL queue. * @param [out] result_out Output array. * @param [in] result_size Size of output array. * @param [in] result_ndim Number of output array dimensions. diff --git a/dpnp/backend/kernels/dpnp_krnl_arraycreation.cpp b/dpnp/backend/kernels/dpnp_krnl_arraycreation.cpp index 9f1b742c84e1..106036156739 100644 --- a/dpnp/backend/kernels/dpnp_krnl_arraycreation.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_arraycreation.cpp @@ -617,9 +617,6 @@ DPCTLSyclEventRef dpnp_vander_c(DPCTLSyclQueueRef q_ref, const int increasing, const DPCTLEventVectorRef dep_event_vec_ref) { - // avoid warning unused variable - (void)dep_event_vec_ref; - DPCTLSyclEventRef event_ref = nullptr; if ((array1_in == nullptr) || (result1 == nullptr)) @@ -637,8 +634,7 @@ DPCTLSyclEventRef dpnp_vander_c(DPCTLSyclQueueRef q_ref, if (N == 1) { - dpnp_ones_c<_DataType_output>(result, size_in); - return event_ref; + return dpnp_ones_c<_DataType_output>(q_ref, result, size_in, dep_event_vec_ref); } if (increasing) @@ -762,9 +758,10 @@ DPCTLSyclEventRef dpnp_trace_c(DPCTLSyclQueueRef q_ref, }; auto event = q.submit(kernel_func); - event.wait(); - return event_ref; + event_ref = reinterpret_cast(&event); + + return DPCTLEvent_Copy(event_ref); } template @@ -849,9 +846,9 @@ DPCTLSyclEventRef dpnp_tri_c(DPCTLSyclQueueRef q_ref, event = q.submit(kernel_func); - event.wait(); + event_ref = reinterpret_cast(&event); - return event_ref; + return DPCTLEvent_Copy(event_ref); } template diff --git a/dpnp/backend/kernels/dpnp_krnl_indexing.cpp b/dpnp/backend/kernels/dpnp_krnl_indexing.cpp index e14e4d1c8b32..66da163c86b5 100644 --- a/dpnp/backend/kernels/dpnp_krnl_indexing.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_indexing.cpp @@ -87,9 +87,10 @@ DPCTLSyclEventRef dpnp_choose_c(DPCTLSyclQueueRef q_ref, }; sycl::event event = q.submit(kernel_func); - event.wait(); - return event_ref; + event_ref = reinterpret_cast(&event); + + return DPCTLEvent_Copy(event_ref); } template @@ -917,9 +918,9 @@ DPCTLSyclEventRef dpnp_take_c(DPCTLSyclQueueRef q_ref, sycl::event event = q.submit(kernel_func); - event.wait(); + event_ref = reinterpret_cast(&event); - return event_ref; + return DPCTLEvent_Copy(event_ref); } template diff --git a/dpnp/backend/kernels/dpnp_krnl_linalg.cpp b/dpnp/backend/kernels/dpnp_krnl_linalg.cpp index fcc2a908343e..a56b4f751649 100644 --- a/dpnp/backend/kernels/dpnp_krnl_linalg.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_linalg.cpp @@ -502,9 +502,9 @@ DPCTLSyclEventRef dpnp_kron_c(DPCTLSyclQueueRef q_ref, sycl::event event = q.submit(kernel_func); - event.wait(); + event_ref = reinterpret_cast(&event); - return event_ref; + return DPCTLEvent_Copy(event_ref); } template diff --git a/dpnp/backend/kernels/dpnp_krnl_logic.cpp b/dpnp/backend/kernels/dpnp_krnl_logic.cpp index cb323734aebf..109246913589 100644 --- a/dpnp/backend/kernels/dpnp_krnl_logic.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_logic.cpp @@ -81,9 +81,9 @@ DPCTLSyclEventRef dpnp_all_c(DPCTLSyclQueueRef q_ref, event = q.submit(kernel_func); - event.wait(); + event_ref = reinterpret_cast(&event); - return event_ref; + return DPCTLEvent_Copy(event_ref); } template @@ -166,9 +166,9 @@ DPCTLSyclEventRef dpnp_allclose_c(DPCTLSyclQueueRef q_ref, event = q.submit(kernel_func); - event.wait(); + event_ref = reinterpret_cast(&event); - return event_ref; + return DPCTLEvent_Copy(event_ref); } template @@ -258,9 +258,9 @@ DPCTLSyclEventRef dpnp_any_c(DPCTLSyclQueueRef q_ref, event = q.submit(kernel_func); - event.wait(); + event_ref = reinterpret_cast(&event); - return event_ref; + return DPCTLEvent_Copy(event_ref); } template diff --git a/dpnp/backend/kernels/dpnp_krnl_manipulation.cpp b/dpnp/backend/kernels/dpnp_krnl_manipulation.cpp index c87ab1bf12e7..8a122dbf7283 100644 --- a/dpnp/backend/kernels/dpnp_krnl_manipulation.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_manipulation.cpp @@ -80,9 +80,9 @@ DPCTLSyclEventRef dpnp_repeat_c(DPCTLSyclQueueRef q_ref, event = q.submit(kernel_func); - event.wait(); + event_ref = reinterpret_cast(&event); - return event_ref; + return DPCTLEvent_Copy(event_ref); } template diff --git a/dpnp/backend/kernels/dpnp_krnl_mathematical.cpp b/dpnp/backend/kernels/dpnp_krnl_mathematical.cpp index bf69fce8e416..dc2493094a26 100644 --- a/dpnp/backend/kernels/dpnp_krnl_mathematical.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_mathematical.cpp @@ -84,9 +84,9 @@ DPCTLSyclEventRef dpnp_around_c(DPCTLSyclQueueRef q_ref, event = q.submit(kernel_func); } - event.wait(); + event_ref = reinterpret_cast(&event); - return event_ref; + return DPCTLEvent_Copy(event_ref); } template @@ -170,9 +170,9 @@ DPCTLSyclEventRef dpnp_elemwise_absolute_c(DPCTLSyclQueueRef q_ref, event = q.submit(kernel_func); } - event.wait(); + event_ref = reinterpret_cast(&event); - return event_ref; + return DPCTLEvent_Copy(event_ref); } template @@ -753,9 +753,9 @@ DPCTLSyclEventRef dpnp_modf_c(DPCTLSyclQueueRef q_ref, event = q.submit(kernel_func); } - event.wait(); + event_ref = reinterpret_cast(&event); - return event_ref; + return DPCTLEvent_Copy(event_ref); } template diff --git a/dpnp/backend/kernels/dpnp_krnl_statistics.cpp b/dpnp/backend/kernels/dpnp_krnl_statistics.cpp index 70512c17f5f5..3e693473ca01 100644 --- a/dpnp/backend/kernels/dpnp_krnl_statistics.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_statistics.cpp @@ -598,6 +598,10 @@ DPCTLSyclEventRef dpnp_mean_c(DPCTLSyclQueueRef q_ref, sycl::event event = mkl_stats::mean(q, dataset, result); event.wait(); + + event_ref = reinterpret_cast(&event); + + return DPCTLEvent_Copy(event_ref); } else { @@ -608,9 +612,9 @@ DPCTLSyclEventRef dpnp_mean_c(DPCTLSyclQueueRef q_ref, result[0] = sum[0] / static_cast<_ResultType>(size); sycl::free(sum, q); - } - return event_ref; + return event_ref; + } } template diff --git a/dpnp/dpnp_algo/dpnp_algo.pxd b/dpnp/dpnp_algo/dpnp_algo.pxd index a4f4156a8ed6..3360164830d9 100644 --- a/dpnp/dpnp_algo/dpnp_algo.pxd +++ b/dpnp/dpnp_algo/dpnp_algo.pxd @@ -438,7 +438,16 @@ ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_2in_1out_strides_t)(c_dpctl.DPCTLSyclQu const long * , const c_dpctl.DPCTLEventVectorRef) ctypedef void(*fptr_blas_gemm_2in_1out_t)(void *, void * , void * , size_t, size_t, size_t) -ctypedef void(*dpnp_reduction_c_t)(void *, const void * , const shape_elem_type*, const size_t, const shape_elem_type*, const size_t, const void * , const long*) +ctypedef c_dpctl.DPCTLSyclEventRef(*dpnp_reduction_c_t)(c_dpctl.DPCTLSyclQueueRef, + void *, + const void * , + const shape_elem_type*, + const size_t, + const shape_elem_type*, + const size_t, + const void * , + const long*, + const c_dpctl.DPCTLEventVectorRef) cpdef dpnp_descriptor dpnp_astype(dpnp_descriptor x1, dtype) cpdef dpnp_descriptor dpnp_flatten(dpnp_descriptor x1) diff --git a/dpnp/dpnp_algo/dpnp_algo.pyx b/dpnp/dpnp_algo/dpnp_algo.pyx index 3018776e596d..5f30dbffbd95 100644 --- a/dpnp/dpnp_algo/dpnp_algo.pyx +++ b/dpnp/dpnp_algo/dpnp_algo.pyx @@ -119,7 +119,7 @@ cpdef utils.dpnp_descriptor dpnp_arange(start, stop, step, dtype): result.get_data(), result.size, NULL) # dep_events_ref) - with nogil: c_dpctl.DPCTLEvent_Wait(event_ref) + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) c_dpctl.DPCTLEvent_Delete(event_ref) return result @@ -150,7 +150,7 @@ cpdef utils.dpnp_descriptor dpnp_astype(utils.dpnp_descriptor x1, dtype): cdef fptr_dpnp_astype_t func = kernel_data.ptr cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, x1.get_data(), result.get_data(), x1.size, NULL) - with nogil: c_dpctl.DPCTLEvent_Wait(event_ref) + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) c_dpctl.DPCTLEvent_Delete(event_ref) return result @@ -197,7 +197,7 @@ cpdef utils.dpnp_descriptor dpnp_flatten(utils.dpnp_descriptor x1): NULL, NULL) # dep_events_ref - with nogil: c_dpctl.DPCTLEvent_Wait(event_ref) + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) c_dpctl.DPCTLEvent_Delete(event_ref) return result @@ -230,7 +230,7 @@ cpdef utils.dpnp_descriptor dpnp_init_val(shape, dtype, value): cdef fptr_dpnp_initval_t func = kernel_data.ptr cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, result.get_data(), val_arr.get_data(), result.size, NULL) - with nogil: c_dpctl.DPCTLEvent_Wait(event_ref) + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) c_dpctl.DPCTLEvent_Delete(event_ref) return result @@ -340,7 +340,7 @@ cdef utils.dpnp_descriptor call_fptr_1out(DPNPFuncName fptr_name, # Call FPTR function cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, result.get_data(), result.size, NULL) - with nogil: c_dpctl.DPCTLEvent_Wait(event_ref) + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) c_dpctl.DPCTLEvent_Delete(event_ref) return result @@ -388,7 +388,7 @@ cdef utils.dpnp_descriptor call_fptr_1in_1out(DPNPFuncName fptr_name, cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, x1.get_data(), result.get_data(), x1.size, NULL) - with nogil: c_dpctl.DPCTLEvent_Wait(event_ref) + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) c_dpctl.DPCTLEvent_Delete(event_ref) return result @@ -455,7 +455,7 @@ cdef utils.dpnp_descriptor call_fptr_1in_1out_strides(DPNPFuncName fptr_name, NULL, NULL) # dep_events_ref - with nogil: c_dpctl.DPCTLEvent_Wait(event_ref) + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) c_dpctl.DPCTLEvent_Delete(event_ref) return result @@ -519,7 +519,7 @@ cdef utils.dpnp_descriptor call_fptr_2in_1out(DPNPFuncName fptr_name, NULL, NULL) # dep_events_ref) - with nogil: c_dpctl.DPCTLEvent_Wait(event_ref) + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) c_dpctl.DPCTLEvent_Delete(event_ref) return result @@ -596,7 +596,7 @@ cdef utils.dpnp_descriptor call_fptr_2in_1out_strides(DPNPFuncName fptr_name, NULL, NULL) # dep_events_ref) - with nogil: c_dpctl.DPCTLEvent_Wait(event_ref) + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) c_dpctl.DPCTLEvent_Delete(event_ref) return result diff --git a/dpnp/dpnp_algo/dpnp_algo_arraycreation.pyx b/dpnp/dpnp_algo/dpnp_algo_arraycreation.pyx index 11912d2423cc..dd6545d4e528 100644 --- a/dpnp/dpnp_algo/dpnp_algo_arraycreation.pyx +++ b/dpnp/dpnp_algo/dpnp_algo_arraycreation.pyx @@ -67,7 +67,9 @@ ctypedef c_dpctl.DPCTLSyclEventRef(*custom_1in_1out_func_ptr_t)(c_dpctl.DPCTLSyc const size_t, const size_t, const c_dpctl.DPCTLEventVectorRef) -ctypedef void(*ftpr_custom_vander_1in_1out_t)(void * , void * , size_t, size_t, int) +ctypedef c_dpctl.DPCTLSyclEventRef(*ftpr_custom_vander_1in_1out_t)(c_dpctl.DPCTLSyclQueueRef, + void * , void * , size_t, size_t, int, + const c_dpctl.DPCTLEventVectorRef) ctypedef c_dpctl.DPCTLSyclEventRef(*custom_arraycreation_1in_1out_func_ptr_t)(c_dpctl.DPCTLSyclQueueRef, void *, const size_t, @@ -82,11 +84,21 @@ ctypedef c_dpctl.DPCTLSyclEventRef(*custom_arraycreation_1in_1out_func_ptr_t)(c_ const shape_elem_type *, const size_t, const c_dpctl.DPCTLEventVectorRef) -ctypedef void(*custom_indexing_1out_func_ptr_t)(void * , const size_t , const size_t , const int) +ctypedef c_dpctl.DPCTLSyclEventRef(*custom_indexing_1out_func_ptr_t)(c_dpctl.DPCTLSyclQueueRef, + void * , + const size_t , + const size_t , + const int, + const c_dpctl.DPCTLEventVectorRef) ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_eye_t)(c_dpctl.DPCTLSyclQueueRef, void *, int , const shape_elem_type * , const c_dpctl.DPCTLEventVectorRef) -ctypedef void(*fptr_dpnp_trace_t)(const void *, void * , const shape_elem_type * , const size_t) +ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_trace_t)(c_dpctl.DPCTLSyclQueueRef, + const void *, + void * , + const shape_elem_type * , + const size_t, + const c_dpctl.DPCTLEventVectorRef) cpdef utils.dpnp_descriptor dpnp_copy(utils.dpnp_descriptor x1): @@ -141,7 +153,7 @@ cpdef utils.dpnp_descriptor dpnp_diag(utils.dpnp_descriptor v, int k): result.ndim, NULL) # dep_events_ref - with nogil: c_dpctl.DPCTLEvent_Wait(event_ref) + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) c_dpctl.DPCTLEvent_Delete(event_ref) return result @@ -171,7 +183,7 @@ cpdef utils.dpnp_descriptor dpnp_eye(N, M=None, k=0, dtype=None): cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, result.get_data(), k, result_shape.data(), NULL) - with nogil: c_dpctl.DPCTLEvent_Wait(event_ref) + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) c_dpctl.DPCTLEvent_Delete(event_ref) return result @@ -208,7 +220,7 @@ cpdef utils.dpnp_descriptor dpnp_full(result_shape, value_in, result_dtype): cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, array_fill.get_data(), result.get_data(), result.size, NULL) - with nogil: c_dpctl.DPCTLEvent_Wait(event_ref) + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) c_dpctl.DPCTLEvent_Delete(event_ref) return result @@ -244,7 +256,7 @@ cpdef utils.dpnp_descriptor dpnp_full_like(result_shape, value_in, result_dtype) # Call FPTR function cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, array_fill.get_data(), result.get_data(), result.size, NULL) - with nogil: c_dpctl.DPCTLEvent_Wait(event_ref) + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) c_dpctl.DPCTLEvent_Delete(event_ref) return result @@ -294,7 +306,7 @@ cpdef utils.dpnp_descriptor dpnp_identity(n, result_dtype): cdef fptr_1out_t func = kernel_data.ptr cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, result.get_data(), n, NULL) - with nogil: c_dpctl.DPCTLEvent_Wait(event_ref) + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) c_dpctl.DPCTLEvent_Delete(event_ref) return result @@ -464,7 +476,7 @@ cpdef dpnp_ptp(utils.dpnp_descriptor arr, axis=None): axis_size, NULL) # dep_events_ref - with nogil: c_dpctl.DPCTLEvent_Wait(event_ref) + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) c_dpctl.DPCTLEvent_Delete(event_ref) return result @@ -483,15 +495,35 @@ cpdef utils.dpnp_descriptor dpnp_trace(utils.dpnp_descriptor arr, offset=0, axis cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(arr.dtype) cdef DPNPFuncType param2_type = dpnp_dtype_to_DPNPFuncType(dtype_) - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_TRACE, param1_type, param2_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_TRACE_EXT, param1_type, param2_type) + + arr_obj = arr.get_array() # ceate result array with type given by FPTR data cdef shape_type_c result_shape = diagonal_shape[:-1] - cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, kernel_data.return_type, None) + cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, + kernel_data.return_type, + None, + device=arr_obj.sycl_device, + usm_type=arr_obj.usm_type, + sycl_queue=arr_obj.sycl_queue) + + result_sycl_queue = result.get_array().sycl_queue + + cdef c_dpctl.SyclQueue q = result_sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() cdef fptr_dpnp_trace_t func = kernel_data.ptr - func(diagonal_arr.get_data(), result.get_data(), diagonal_shape.data(), diagonal_ndim) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, + diagonal_arr.get_data(), + result.get_data(), + diagonal_shape.data(), + diagonal_ndim, + NULL) # dep_events_ref + + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) return result @@ -505,14 +537,22 @@ cpdef utils.dpnp_descriptor dpnp_tri(N, M=None, k=0, dtype=numpy.float): cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(dtype) - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_TRI, param1_type, param1_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_TRI_EXT, param1_type, param1_type) cdef shape_type_c shape_in = (N, M) cdef utils.dpnp_descriptor result = utils.create_output_descriptor(shape_in, kernel_data.return_type, None) + result_sycl_queue = result.get_array().sycl_queue + + cdef c_dpctl.SyclQueue q = result_sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() + cdef custom_indexing_1out_func_ptr_t func = kernel_data.ptr - func(result.get_data(), N, M, k) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, result.get_data(), N, M, k, NULL) + + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) return result @@ -555,7 +595,7 @@ cpdef utils.dpnp_descriptor dpnp_tril(utils.dpnp_descriptor m, int k): result.ndim, NULL) # dep_events_ref - with nogil: c_dpctl.DPCTLEvent_Wait(event_ref) + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) c_dpctl.DPCTLEvent_Delete(event_ref) return result @@ -599,7 +639,7 @@ cpdef utils.dpnp_descriptor dpnp_triu(utils.dpnp_descriptor m, int k): result.ndim, NULL) # dep_events_ref - with nogil: c_dpctl.DPCTLEvent_Wait(event_ref) + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) c_dpctl.DPCTLEvent_Delete(event_ref) return result @@ -607,14 +647,35 @@ cpdef utils.dpnp_descriptor dpnp_triu(utils.dpnp_descriptor m, int k): cpdef utils.dpnp_descriptor dpnp_vander(utils.dpnp_descriptor x1, int N, int increasing): cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(x1.dtype) - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_VANDER, param1_type, DPNP_FT_NONE) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_VANDER_EXT, param1_type, DPNP_FT_NONE) + + x1_obj = x1.get_array() # ceate result array with type given by FPTR data cdef shape_type_c result_shape = (x1.size, N) - cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, kernel_data.return_type, None) + cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, + kernel_data.return_type, + None, + device=x1_obj.sycl_device, + usm_type=x1_obj.usm_type, + sycl_queue=x1_obj.sycl_queue) + + result_sycl_queue = result.get_array().sycl_queue + + cdef c_dpctl.SyclQueue q = result_sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() cdef ftpr_custom_vander_1in_1out_t func = kernel_data.ptr - func(x1.get_data(), result.get_data(), x1.size, N, increasing) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, + x1.get_data(), + result.get_data(), + x1.size, + N, + increasing, + NULL) # dep_events_ref + + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) return result diff --git a/dpnp/dpnp_algo/dpnp_algo_indexing.pyx b/dpnp/dpnp_algo/dpnp_algo_indexing.pyx index 83417a5485be..f920ad0f4007 100644 --- a/dpnp/dpnp_algo/dpnp_algo_indexing.pyx +++ b/dpnp/dpnp_algo/dpnp_algo_indexing.pyx @@ -54,46 +54,109 @@ __all__ += [ "dpnp_triu_indices_from" ] -ctypedef void(*fptr_dpnp_choose_t)(void *, void * , void ** , size_t, size_t, size_t) -ctypedef void(*fptr_dpnp_diag_indices)(void * , size_t) -ctypedef void(*custom_indexing_2in_1out_func_ptr_t)(void *, const size_t, void * , void * , size_t) -ctypedef void(*custom_indexing_2in_1out_func_ptr_t_)(void * , const size_t, void * , const size_t, shape_elem_type * , - shape_elem_type *, const size_t) -ctypedef void(*custom_indexing_2in_func_ptr_t)(void *, void * , shape_elem_type * , const size_t) -ctypedef void(*custom_indexing_3in_func_ptr_t)(void * , void * , void * , const size_t, const size_t) -ctypedef void(*custom_indexing_3in_with_axis_func_ptr_t)(void * , void * , void * , const size_t, shape_elem_type * , - const size_t, const size_t, const size_t,) -ctypedef void(*custom_indexing_6in_func_ptr_t)(void *, void * , void * , const size_t, const size_t, const size_t) -ctypedef void(*fptr_dpnp_nonzero_t)(const void * , void * , const size_t, const shape_elem_type * , const size_t , - const size_t) - - -cpdef utils.dpnp_descriptor dpnp_choose(utils.dpnp_descriptor input, list choices1): +ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_choose_t)(c_dpctl.DPCTLSyclQueueRef, + void *, void * , void ** , size_t, size_t, size_t, + const c_dpctl.DPCTLEventVectorRef) +ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_diag_indices)(c_dpctl.DPCTLSyclQueueRef, + void * , size_t, + const c_dpctl.DPCTLEventVectorRef) +ctypedef c_dpctl.DPCTLSyclEventRef(*custom_indexing_2in_1out_func_ptr_t)(c_dpctl.DPCTLSyclQueueRef, + void *, + const size_t, + void * , + void * , + size_t, + const c_dpctl.DPCTLEventVectorRef) +ctypedef c_dpctl.DPCTLSyclEventRef(*custom_indexing_2in_1out_func_ptr_t_)(c_dpctl.DPCTLSyclQueueRef, + void * , + const size_t, + void * , + const size_t, + shape_elem_type * , + shape_elem_type *, + const size_t, + const c_dpctl.DPCTLEventVectorRef) +ctypedef c_dpctl.DPCTLSyclEventRef(*custom_indexing_2in_func_ptr_t)(c_dpctl.DPCTLSyclQueueRef, + void *, void * , shape_elem_type * , const size_t, + const c_dpctl.DPCTLEventVectorRef) +ctypedef c_dpctl.DPCTLSyclEventRef(*custom_indexing_3in_func_ptr_t)(c_dpctl.DPCTLSyclQueueRef, + void * , + void * , + void * , + const size_t, + const size_t, + const c_dpctl.DPCTLEventVectorRef) +ctypedef c_dpctl.DPCTLSyclEventRef(*custom_indexing_3in_with_axis_func_ptr_t)(c_dpctl.DPCTLSyclQueueRef, + void * , + void * , + void * , + const size_t, + shape_elem_type * , + const size_t, + const size_t, + const size_t, + const c_dpctl.DPCTLEventVectorRef) +ctypedef c_dpctl.DPCTLSyclEventRef(*custom_indexing_6in_func_ptr_t)(c_dpctl.DPCTLSyclQueueRef, + void *, + void * , + void * , + const size_t, + const size_t, + const size_t, + const c_dpctl.DPCTLEventVectorRef) +ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_nonzero_t)(c_dpctl.DPCTLSyclQueueRef, + const void * , + void * , + const size_t, + const shape_elem_type * , + const size_t , + const size_t, + const c_dpctl.DPCTLEventVectorRef) + + +cpdef utils.dpnp_descriptor dpnp_choose(utils.dpnp_descriptor x1, list choices1): cdef vector[void * ] choices cdef utils.dpnp_descriptor choice for desc in choices1: choice = desc choices.push_back(choice.get_data()) - cdef shape_type_c input_shape = input.shape + cdef shape_type_c x1_shape = x1.shape cdef size_t choice_size = choices1[0].size - cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(input.dtype) + cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(x1.dtype) cdef DPNPFuncType param2_type = dpnp_dtype_to_DPNPFuncType(choices1[0].dtype) - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_CHOOSE, param1_type, param2_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_CHOOSE_EXT, param1_type, param2_type) - cdef utils.dpnp_descriptor res_array = utils.create_output_descriptor(input_shape, kernel_data.return_type, None) + x1_obj = x1.get_array() + + cdef utils.dpnp_descriptor res_array = utils.create_output_descriptor(x1_shape, + kernel_data.return_type, + None, + device=x1_obj.sycl_device, + usm_type=x1_obj.usm_type, + sycl_queue=x1_obj.sycl_queue) + + result_sycl_queue = res_array.get_array().sycl_queue + + cdef c_dpctl.SyclQueue q = result_sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() cdef fptr_dpnp_choose_t func = kernel_data.ptr - func(res_array.get_data(), - input.get_data(), - choices.data(), - input_shape[0], - choices.size(), - choice_size) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, + res_array.get_data(), + x1.get_data(), + choices.data(), + x1_shape[0], + choices.size(), + choice_size, + NULL) # dep_events_ref + + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) return res_array @@ -103,73 +166,120 @@ cpdef tuple dpnp_diag_indices(n, ndim): cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(dpnp.int64) - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_DIAG_INDICES, param1_type, param1_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_DIAG_INDICES_EXT, param1_type, param1_type) cdef fptr_dpnp_diag_indices func = kernel_data.ptr + cdef c_dpctl.SyclQueue q + cdef c_dpctl.DPCTLSyclQueueRef q_ref + cdef c_dpctl.DPCTLSyclEventRef event_ref + res_list = [] cdef utils.dpnp_descriptor res_arr cdef shape_type_c result_shape = utils._object_to_tuple(res_size) for i in range(ndim): res_arr = utils.create_output_descriptor(result_shape, kernel_data.return_type, None) - func(res_arr.get_data(), res_size) + q = res_arr.get_array().sycl_queue + q_ref = q.get_queue_ref() + + event_ref = func(q_ref, res_arr.get_data(), res_size, NULL) + + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) res_list.append(res_arr.get_pyobj()) return tuple(res_list) -cpdef utils.dpnp_descriptor dpnp_diagonal(dpnp_descriptor input, offset=0): - cdef shape_type_c input_shape = input.shape - n = min(input.shape[0], input.shape[1]) - res_shape = [None] * (input.ndim - 1) +cpdef utils.dpnp_descriptor dpnp_diagonal(dpnp_descriptor x1, offset=0): + cdef shape_type_c x1_shape = x1.shape + + n = min(x1.shape[0], x1.shape[1]) + res_shape = [None] * (x1.ndim - 1) - if input.ndim > 2: - for i in range(input.ndim - 2): - res_shape[i] = input.shape[i + 2] + if x1.ndim > 2: + for i in range(x1.ndim - 2): + res_shape[i] = x1.shape[i + 2] - if (n + offset) > input.shape[1]: - res_shape[-1] = input.shape[1] - offset - elif (n + offset) > input.shape[0]: - res_shape[-1] = input.shape[0] + if (n + offset) > x1.shape[1]: + res_shape[-1] = x1.shape[1] - offset + elif (n + offset) > x1.shape[0]: + res_shape[-1] = x1.shape[0] else: res_shape[-1] = n + offset cdef shape_type_c result_shape = res_shape res_ndim = len(res_shape) - cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(input.dtype) + cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(x1.dtype) - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_DIAGONAL, param1_type, param1_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_DIAGONAL_EXT, param1_type, param1_type) - cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, kernel_data.return_type, None) + x1_obj = x1.get_array() + + cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, + kernel_data.return_type, + None, + device=x1_obj.sycl_device, + usm_type=x1_obj.usm_type, + sycl_queue=x1_obj.sycl_queue) + + result_sycl_queue = result.get_array().sycl_queue + + cdef c_dpctl.SyclQueue q = result_sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() cdef custom_indexing_2in_1out_func_ptr_t_ func = kernel_data.ptr - func(input.get_data(), - input.size, - result.get_data(), - offset, - input_shape.data(), - result_shape.data(), - res_ndim) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, + x1.get_data(), + x1.size, + result.get_data(), + offset, + x1_shape.data(), + result_shape.data(), + res_ndim, + NULL) # dep_events_ref + + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) return result -cpdef dpnp_fill_diagonal(dpnp_descriptor input, val): - cdef shape_type_c input_shape = input.shape - cdef utils.dpnp_descriptor val_arr = utils_py.create_output_descriptor_py((1,), input.dtype, None) +cpdef dpnp_fill_diagonal(dpnp_descriptor x1, val): + x1_obj = x1.get_array() + + cdef shape_type_c x1_shape = x1.shape + cdef utils.dpnp_descriptor val_arr = utils_py.create_output_descriptor_py((1,), + x1.dtype, + None, + device=x1_obj.sycl_device, + usm_type=x1_obj.usm_type, + sycl_queue=x1_obj.sycl_queue) + val_arr.get_pyobj()[0] = val - cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(input.dtype) + cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(x1.dtype) - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_FILL_DIAGONAL, param1_type, param1_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_FILL_DIAGONAL_EXT, param1_type, param1_type) + + cdef c_dpctl.SyclQueue q = x1_obj.sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() cdef custom_indexing_2in_func_ptr_t func = kernel_data.ptr - func(input.get_data(), val_arr.get_data(), input_shape.data(), input.ndim) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, + x1.get_data(), + val_arr.get_data(), + x1_shape.data(), + x1.ndim, + NULL) # dep_events_ref + + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) cpdef object dpnp_indices(dimensions): @@ -216,10 +326,14 @@ cpdef tuple dpnp_nonzero(utils.dpnp_descriptor in_array1): cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(in_array1.dtype) - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_NONZERO, param1_type, param1_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_NONZERO_EXT, param1_type, param1_type) cdef fptr_dpnp_nonzero_t func = kernel_data.ptr + cdef c_dpctl.SyclQueue q + cdef c_dpctl.DPCTLSyclQueueRef q_ref + cdef c_dpctl.DPCTLSyclEventRef event_ref + array1_obj = in_array1.get_array() res_list = [] @@ -234,7 +348,20 @@ cpdef tuple dpnp_nonzero(utils.dpnp_descriptor in_array1): usm_type=array1_obj.usm_type, sycl_queue=array1_obj.sycl_queue) - func(in_array1.get_data(), res_arr.get_data(), res_arr.size, shape_arr.data(), in_array1.ndim, j) + q = res_arr.get_array().sycl_queue + q_ref = q.get_queue_ref() + + event_ref = func(q_ref, + in_array1.get_data(), + res_arr.get_data(), + res_arr.size, + shape_arr.data(), + in_array1.ndim, + j, + NULL) # dep_events_ref + + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) res_list.append(res_arr.get_pyobj()) @@ -244,7 +371,14 @@ cpdef tuple dpnp_nonzero(utils.dpnp_descriptor in_array1): cpdef dpnp_place(dpnp_descriptor arr, object mask, dpnp_descriptor vals): - cdef utils.dpnp_descriptor mask_ = utils_py.create_output_descriptor_py((mask.size,), dpnp.int64, None) + result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(arr, vals) + + cdef utils.dpnp_descriptor mask_ = utils_py.create_output_descriptor_py((mask.size,), + dpnp.int64, + None, + device=result_sycl_device, + usm_type=result_usm_type, + sycl_queue=result_sycl_queue) for i in range(mask.size): if mask.item(i): mask_.get_pyobj()[i] = 1 @@ -252,21 +386,40 @@ cpdef dpnp_place(dpnp_descriptor arr, object mask, dpnp_descriptor vals): mask_.get_pyobj()[i] = 0 cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(arr.dtype) - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_PLACE, param1_type, param1_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_PLACE_EXT, param1_type, param1_type) + + cdef c_dpctl.SyclQueue q = result_sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() cdef custom_indexing_3in_func_ptr_t func = kernel_data.ptr - func(arr.get_data(), mask_.get_data(), vals.get_data(), arr.size, vals.size) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, + arr.get_data(), + mask_.get_data(), + vals.get_data(), + arr.size, + vals.size, + NULL) # dep_events_ref + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) -cpdef dpnp_put(dpnp_descriptor input, object ind, v): + +cpdef dpnp_put(dpnp_descriptor x1, object ind, v): ind_is_list = isinstance(ind, list) + x1_obj = x1.get_array() + if dpnp.isscalar(ind): ind_size = 1 else: ind_size = len(ind) - cdef utils.dpnp_descriptor ind_array = utils_py.create_output_descriptor_py((ind_size,), dpnp.int64, None) + cdef utils.dpnp_descriptor ind_array = utils_py.create_output_descriptor_py((ind_size,), + dpnp.int64, + None, + device=x1_obj.sycl_device, + usm_type=x1_obj.usm_type, + sycl_queue=x1_obj.sycl_queue) if dpnp.isscalar(ind): ind_array.get_pyobj()[0] = ind else: @@ -277,32 +430,67 @@ cpdef dpnp_put(dpnp_descriptor input, object ind, v): v_size = 1 else: v_size = len(v) - cdef utils.dpnp_descriptor v_array = utils_py.create_output_descriptor_py((v_size,), input.dtype, None) + cdef utils.dpnp_descriptor v_array = utils_py.create_output_descriptor_py((v_size,), + x1.dtype, + None, + device=x1_obj.sycl_device, + usm_type=x1_obj.usm_type, + sycl_queue=x1_obj.sycl_queue) if dpnp.isscalar(v): v_array.get_pyobj()[0] = v else: for i in range(v_size): v_array.get_pyobj()[i] = v[i] - cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(input.dtype) + cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(x1.dtype) - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_PUT, param1_type, param1_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_PUT_EXT, param1_type, param1_type) + + cdef c_dpctl.SyclQueue q = x1_obj.sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() cdef custom_indexing_6in_func_ptr_t func = kernel_data.ptr - func(input.get_data(), ind_array.get_data(), v_array.get_data(), input.size, ind_array.size, v_array.size) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, + x1.get_data(), + ind_array.get_data(), + v_array.get_data(), + x1.size, + ind_array.size, + v_array.size, + NULL) # dep_events_ref + + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) cpdef dpnp_put_along_axis(dpnp_descriptor arr, dpnp_descriptor indices, dpnp_descriptor values, int axis): cdef shape_type_c arr_shape = arr.shape cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(arr.dtype) - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_PUT_ALONG_AXIS, param1_type, param1_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_PUT_ALONG_AXIS_EXT, param1_type, param1_type) + + utils.get_common_usm_allocation(arr, indices) # check USM allocation is common + _, _, result_sycl_queue = utils.get_common_usm_allocation(arr, values) + + cdef c_dpctl.SyclQueue q = result_sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() cdef custom_indexing_3in_with_axis_func_ptr_t func = kernel_data.ptr - func(arr.get_data(), indices.get_data(), values.get_data(), - axis, arr_shape.data(), arr.ndim, indices.size, values.size) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, + arr.get_data(), + indices.get_data(), + values.get_data(), + axis, + arr_shape.data(), + arr.ndim, + indices.size, + values.size, + NULL) # dep_events_ref + + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) cpdef dpnp_putmask(utils.dpnp_descriptor arr, utils.dpnp_descriptor mask, utils.dpnp_descriptor values): @@ -334,16 +522,37 @@ cpdef utils.dpnp_descriptor dpnp_select(list condlist, list choicelist, default) return res_array -cpdef utils.dpnp_descriptor dpnp_take(utils.dpnp_descriptor input, utils.dpnp_descriptor indices): - cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(input.dtype) +cpdef utils.dpnp_descriptor dpnp_take(utils.dpnp_descriptor x1, utils.dpnp_descriptor indices): + cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(x1.dtype) + + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_TAKE_EXT, param1_type, param1_type) - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_TAKE, param1_type, param1_type) + x1_obj = x1.get_array() - cdef utils.dpnp_descriptor result = utils.create_output_descriptor(indices.shape, kernel_data.return_type, None) + cdef utils.dpnp_descriptor result = utils.create_output_descriptor(indices.shape, + kernel_data.return_type, + None, + device=x1_obj.sycl_device, + usm_type=x1_obj.usm_type, + sycl_queue=x1_obj.sycl_queue) + + result_sycl_queue = result.get_array().sycl_queue + + cdef c_dpctl.SyclQueue q = result_sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() cdef custom_indexing_2in_1out_func_ptr_t func = kernel_data.ptr - func(input.get_data(), input.size, indices.get_data(), result.get_data(), indices.size) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, + x1.get_data(), + x1.size, + indices.get_data(), + result.get_data(), + indices.size, + NULL) # dep_events_ref + + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) return result diff --git a/dpnp/dpnp_algo/dpnp_algo_linearalgebra.pyx b/dpnp/dpnp_algo/dpnp_algo_linearalgebra.pyx index 33eb55e5dcef..c738cc75b705 100644 --- a/dpnp/dpnp_algo/dpnp_algo_linearalgebra.pyx +++ b/dpnp/dpnp_algo/dpnp_algo_linearalgebra.pyx @@ -44,14 +44,18 @@ __all__ += [ # C function pointer to the C library template functions -ctypedef void(*fptr_2in_1out_shapes_t)(void *, void * , void * , shape_elem_type * , - shape_elem_type *, shape_elem_type * , size_t) -ctypedef void(*fptr_2in_1out_dot_t)(void * , const size_t, const size_t, - const shape_elem_type *, const shape_elem_type * , - void * , const size_t, const size_t, - const shape_elem_type *, const shape_elem_type * , - void * , const size_t, const size_t, - const shape_elem_type *, const shape_elem_type * ) +ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_2in_1out_shapes_t)(c_dpctl.DPCTLSyclQueueRef, + void *, void * , void * , shape_elem_type * , + shape_elem_type *, shape_elem_type * , size_t, + const c_dpctl.DPCTLEventVectorRef) +ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_2in_1out_dot_t)(c_dpctl.DPCTLSyclQueueRef, + void * , const size_t, const size_t, + const shape_elem_type *, const shape_elem_type * , + void * , const size_t, const size_t, + const shape_elem_type *, const shape_elem_type * , + void * , const size_t, const size_t, + const shape_elem_type *, const shape_elem_type * , + const c_dpctl.DPCTLEventVectorRef) ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_2in_1out_matmul_t)(c_dpctl.DPCTLSyclQueueRef, void * , const size_t, const size_t, const shape_elem_type *, const shape_elem_type * , @@ -73,7 +77,7 @@ cpdef utils.dpnp_descriptor dpnp_dot(utils.dpnp_descriptor in_array1, utils.dpnp cdef DPNPFuncType param2_type = dpnp_dtype_to_DPNPFuncType(in_array2.dtype) # get the FPTR data structure - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_DOT, param1_type, param2_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_DOT_EXT, param1_type, param2_type) ndim1 = in_array1.ndim ndim2 = in_array2.ndim @@ -95,8 +99,15 @@ cpdef utils.dpnp_descriptor dpnp_dot(utils.dpnp_descriptor in_array1, utils.dpnp shape2 = (shape1[0], 1) result_shape = shape1[:-1] + shape2[:-2] + shape2[-1:] + result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(in_array1, in_array2) + # create result array with type given by FPTR data - cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, kernel_data.return_type, None) + cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, + kernel_data.return_type, + None, + device=result_sycl_device, + usm_type=result_usm_type, + sycl_queue=result_sycl_queue) cdef shape_type_c result_strides = utils.strides_to_vector(result.strides, result.shape) cdef shape_type_c in_array1_shape = in_array1.shape @@ -104,23 +115,31 @@ cpdef utils.dpnp_descriptor dpnp_dot(utils.dpnp_descriptor in_array1, utils.dpnp cdef shape_type_c in_array2_shape = in_array2.shape cdef shape_type_c in_array2_strides = utils.strides_to_vector(in_array2.strides, in_array2.shape) + cdef c_dpctl.SyclQueue q = result_sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() + cdef fptr_2in_1out_dot_t func = kernel_data.ptr # call FPTR function - func(result.get_data(), - result.size, - result.ndim, - result_shape.data(), - result_strides.data(), - in_array1.get_data(), - in_array1.size, - in_array1.ndim, - in_array1_shape.data(), - in_array1_strides.data(), - in_array2.get_data(), - in_array2.size, - in_array2.ndim, - in_array2_shape.data(), - in_array2_strides.data()) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, + result.get_data(), + result.size, + result.ndim, + result_shape.data(), + result_strides.data(), + in_array1.get_data(), + in_array1.size, + in_array1.ndim, + in_array1_shape.data(), + in_array1_strides.data(), + in_array2.get_data(), + in_array2.size, + in_array2.ndim, + in_array2_shape.data(), + in_array2_strides.data(), + NULL) # dep_events_ref + + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) return result @@ -136,8 +155,15 @@ cpdef utils.dpnp_descriptor dpnp_inner(dpnp_descriptor array1, dpnp_descriptor a cdef shape_type_c result_shape = array1_no_last_axes result_shape.insert(result_shape.end(), array2_no_last_axes.begin(), array2_no_last_axes.end()) + result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(array1, array2) + # ceate result array with type given by FPTR data - cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(result_shape, result_type, None) + cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(result_shape, + result_type, + None, + device=result_sycl_device, + usm_type=result_usm_type, + sycl_queue=result_sycl_queue) # calculate input arrays offsets cdef shape_type_c array1_offsets = [1] * len(array1.shape) @@ -218,15 +244,35 @@ cpdef utils.dpnp_descriptor dpnp_kron(dpnp_descriptor in_array1, dpnp_descriptor cdef DPNPFuncType param2_type = dpnp_dtype_to_DPNPFuncType(in_array2.dtype) # get the FPTR data structure - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_KRON, param1_type, param2_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_KRON_EXT, param1_type, param2_type) + + result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(in_array1, in_array2) # ceate result array with type given by FPTR data - cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, kernel_data.return_type, None) + cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, + kernel_data.return_type, + None, + device=result_sycl_device, + usm_type=result_usm_type, + sycl_queue=result_sycl_queue) + + cdef c_dpctl.SyclQueue q = result_sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() cdef fptr_2in_1out_shapes_t func = kernel_data.ptr # call FPTR function - func(in_array1.get_data(), in_array2.get_data(), result.get_data(), - in_array1_shape.data(), in_array2_shape.data(), result_shape.data(), ndim) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, + in_array1.get_data(), + in_array2.get_data(), + result.get_data(), + in_array1_shape.data(), + in_array2_shape.data(), + result_shape.data(), + ndim, + NULL) # dep_events_ref + + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) return result @@ -324,7 +370,14 @@ cpdef utils.dpnp_descriptor dpnp_outer(utils.dpnp_descriptor array1, utils.dpnp_ cdef shape_type_c result_shape = (array1.size, array2.size) result_type = numpy.promote_types(array1.dtype, array1.dtype) - cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(result_shape, result_type, None) + result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(array1, array2) + + cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(result_shape, + result_type, + None, + device=result_sycl_device, + usm_type=result_usm_type, + sycl_queue=result_sycl_queue) result_flatiter = result.get_pyobj().flat array1_flatiter = array1.get_pyobj().flat diff --git a/dpnp/dpnp_algo/dpnp_algo_logic.pyx b/dpnp/dpnp_algo/dpnp_algo_logic.pyx index c2bc4dfcc955..e0b928ddf025 100644 --- a/dpnp/dpnp_algo/dpnp_algo_logic.pyx +++ b/dpnp/dpnp_algo/dpnp_algo_logic.pyx @@ -55,20 +55,44 @@ __all__ += [ ] -ctypedef void(*custom_logic_1in_1out_func_ptr_t)(void *, void * , const size_t) -ctypedef void(*custom_allclose_1in_1out_func_ptr_t)(void * , void * , void * , const size_t, double, double) +ctypedef c_dpctl.DPCTLSyclEventRef(*custom_logic_1in_1out_func_ptr_t)(c_dpctl.DPCTLSyclQueueRef, + void *, void * , const size_t, + const c_dpctl.DPCTLEventVectorRef) +ctypedef c_dpctl.DPCTLSyclEventRef(*custom_allclose_1in_1out_func_ptr_t)(c_dpctl.DPCTLSyclQueueRef, + void * , + void * , + void * , + const size_t, + double, + double, + const c_dpctl.DPCTLEventVectorRef) cpdef utils.dpnp_descriptor dpnp_all(utils.dpnp_descriptor array1): - cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py((1,), dpnp.bool, None) + array1_obj = array1.get_array() + + cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py((1,), + dpnp.bool, + None, + device=array1_obj.sycl_device, + usm_type=array1_obj.usm_type, + sycl_queue=array1_obj.sycl_queue) + + result_sycl_queue = result.get_array().sycl_queue + + cdef c_dpctl.SyclQueue q = result_sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(array1.dtype) - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_ALL, param1_type, param1_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_ALL_EXT, param1_type, param1_type) cdef custom_logic_1in_1out_func_ptr_t func = kernel_data.ptr - func(array1.get_data(), result.get_data(), array1.size) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, array1.get_data(), result.get_data(), array1.size, NULL) + + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) return result @@ -77,48 +101,93 @@ cpdef utils.dpnp_descriptor dpnp_allclose(utils.dpnp_descriptor array1, utils.dpnp_descriptor array2, double rtol_val, double atol_val): - cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py((1,), dpnp.bool, None) + result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(array1, array2) + + cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py((1,), + dpnp.bool, + None, + device=result_sycl_device, + usm_type=result_usm_type, + sycl_queue=result_sycl_queue) cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(array1.dtype) cdef DPNPFuncType param2_type = dpnp_dtype_to_DPNPFuncType(array2.dtype) - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_ALLCLOSE, param1_type, param2_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_ALLCLOSE_EXT, param1_type, param2_type) + + result_sycl_queue = result.get_array().sycl_queue + + cdef c_dpctl.SyclQueue q = result_sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() cdef custom_allclose_1in_1out_func_ptr_t func = kernel_data.ptr - func(array1.get_data(), array2.get_data(), result.get_data(), array1.size, rtol_val, atol_val) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, + array1.get_data(), + array2.get_data(), + result.get_data(), + array1.size, + rtol_val, + atol_val, + NULL) # dep_events_ref + + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) return result cpdef utils.dpnp_descriptor dpnp_any(utils.dpnp_descriptor array1): - cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py((1,), dpnp.bool, None) + array1_obj = array1.get_array() + + cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py((1,), + dpnp.bool, + None, + device=array1_obj.sycl_device, + usm_type=array1_obj.usm_type, + sycl_queue=array1_obj.sycl_queue) cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(array1.dtype) - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_ANY, param1_type, param1_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_ANY_EXT, param1_type, param1_type) + + result_sycl_queue = result.get_array().sycl_queue + + cdef c_dpctl.SyclQueue q = result_sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() cdef custom_logic_1in_1out_func_ptr_t func = kernel_data.ptr - func(array1.get_data(), result.get_data(), array1.size) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, array1.get_data(), result.get_data(), array1.size, NULL) + + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) return result -cpdef utils.dpnp_descriptor dpnp_equal(utils.dpnp_descriptor array1, utils.dpnp_descriptor input2): - cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(array1.shape, +cpdef utils.dpnp_descriptor dpnp_equal(utils.dpnp_descriptor input1, utils.dpnp_descriptor input2): + result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(input1, input2) + cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape, dpnp.bool, - None) + None, + device=result_sycl_device, + usm_type=result_usm_type, + sycl_queue=result_sycl_queue) for i in range(result.size): - result.get_pyobj()[i] = dpnp.bool(array1.get_pyobj()[i] == input2.get_pyobj()[i]) + result.get_pyobj()[i] = dpnp.bool(input1.get_pyobj()[i] == input2.get_pyobj()[i]) return result cpdef utils.dpnp_descriptor dpnp_greater(utils.dpnp_descriptor input1, utils.dpnp_descriptor input2): + result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(input1, input2) cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape, dpnp.bool, - None) + None, + device=result_sycl_device, + usm_type=result_usm_type, + sycl_queue=result_sycl_queue) for i in range(result.size): result.get_pyobj()[i] = dpnp.bool(input1.get_pyobj()[i] > input2.get_pyobj()[i]) @@ -126,9 +195,13 @@ cpdef utils.dpnp_descriptor dpnp_greater(utils.dpnp_descriptor input1, utils.dpn cpdef utils.dpnp_descriptor dpnp_greater_equal(utils.dpnp_descriptor input1, utils.dpnp_descriptor input2): + result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(input1, input2) cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape, dpnp.bool, - None) + None, + device=result_sycl_device, + usm_type=result_usm_type, + sycl_queue=result_sycl_queue) for i in range(result.size): result.get_pyobj()[i] = dpnp.bool(input1.get_pyobj()[i] >= input2.get_pyobj()[i]) @@ -140,7 +213,13 @@ cpdef utils.dpnp_descriptor dpnp_isclose(utils.dpnp_descriptor input1, double rtol=1e-05, double atol=1e-08, cpp_bool equal_nan=False): - cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape, dpnp.bool, None) + result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(input1, input2) + cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape, + dpnp.bool, + None, + device=result_sycl_device, + usm_type=result_usm_type, + sycl_queue=result_sycl_queue) for i in range(result.size): result.get_pyobj()[i] = numpy.isclose(input1.get_pyobj()[i], input2.get_pyobj()[i], rtol, atol, equal_nan) @@ -149,7 +228,13 @@ cpdef utils.dpnp_descriptor dpnp_isclose(utils.dpnp_descriptor input1, cpdef utils.dpnp_descriptor dpnp_isfinite(utils.dpnp_descriptor input1): - cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape, dpnp.bool, None) + input1_obj = input1.get_array() + cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape, + dpnp.bool, + None, + device=input1_obj.sycl_device, + usm_type=input1_obj.usm_type, + sycl_queue=input1_obj.sycl_queue) for i in range(result.size): result.get_pyobj()[i] = numpy.isfinite(input1.get_pyobj()[i]) @@ -158,7 +243,13 @@ cpdef utils.dpnp_descriptor dpnp_isfinite(utils.dpnp_descriptor input1): cpdef utils.dpnp_descriptor dpnp_isinf(utils.dpnp_descriptor input1): - cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape, dpnp.bool, None) + input1_obj = input1.get_array() + cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape, + dpnp.bool, + None, + device=input1_obj.sycl_device, + usm_type=input1_obj.usm_type, + sycl_queue=input1_obj.sycl_queue) for i in range(result.size): result.get_pyobj()[i] = numpy.isinf(input1.get_pyobj()[i]) @@ -167,7 +258,13 @@ cpdef utils.dpnp_descriptor dpnp_isinf(utils.dpnp_descriptor input1): cpdef utils.dpnp_descriptor dpnp_isnan(utils.dpnp_descriptor input1): - cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape, dpnp.bool, None) + input1_obj = input1.get_array() + cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape, + dpnp.bool, + None, + device=input1_obj.sycl_device, + usm_type=input1_obj.usm_type, + sycl_queue=input1_obj.sycl_queue) for i in range(result.size): result.get_pyobj()[i] = numpy.isnan(input1.get_pyobj()[i]) @@ -176,9 +273,13 @@ cpdef utils.dpnp_descriptor dpnp_isnan(utils.dpnp_descriptor input1): cpdef utils.dpnp_descriptor dpnp_less(utils.dpnp_descriptor input1, utils.dpnp_descriptor input2): + result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(input1, input2) cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape, dpnp.bool, - None) + None, + device=result_sycl_device, + usm_type=result_usm_type, + sycl_queue=result_sycl_queue) for i in range(result.size): result.get_pyobj()[i] = dpnp.bool(input1.get_pyobj()[i] < input2.get_pyobj()[i]) @@ -186,9 +287,13 @@ cpdef utils.dpnp_descriptor dpnp_less(utils.dpnp_descriptor input1, utils.dpnp_d cpdef utils.dpnp_descriptor dpnp_less_equal(utils.dpnp_descriptor input1, utils.dpnp_descriptor input2): + result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(input1, input2) cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape, dpnp.bool, - None) + None, + device=result_sycl_device, + usm_type=result_usm_type, + sycl_queue=result_sycl_queue) for i in range(result.size): result.get_pyobj()[i] = dpnp.bool(input1.get_pyobj()[i] <= input2.get_pyobj()[i]) @@ -196,7 +301,13 @@ cpdef utils.dpnp_descriptor dpnp_less_equal(utils.dpnp_descriptor input1, utils. cpdef utils.dpnp_descriptor dpnp_logical_and(utils.dpnp_descriptor input1, utils.dpnp_descriptor input2): - cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape, dpnp.bool, None) + result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(input1, input2) + cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape, + dpnp.bool, + None, + device=result_sycl_device, + usm_type=result_usm_type, + sycl_queue=result_sycl_queue) for i in range(result.size): result.get_pyobj()[i] = numpy.logical_and(input1.get_pyobj()[i], input2.get_pyobj()[i]) @@ -205,7 +316,13 @@ cpdef utils.dpnp_descriptor dpnp_logical_and(utils.dpnp_descriptor input1, utils cpdef utils.dpnp_descriptor dpnp_logical_not(utils.dpnp_descriptor input1): - cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape, dpnp.bool, None) + input1_obj = input1.get_array() + cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape, + dpnp.bool, + None, + device=input1_obj.sycl_device, + usm_type=input1_obj.usm_type, + sycl_queue=input1_obj.sycl_queue) for i in range(result.size): result.get_pyobj()[i] = numpy.logical_not(input1.get_pyobj()[i]) @@ -214,7 +331,13 @@ cpdef utils.dpnp_descriptor dpnp_logical_not(utils.dpnp_descriptor input1): cpdef utils.dpnp_descriptor dpnp_logical_or(utils.dpnp_descriptor input1, utils.dpnp_descriptor input2): - cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape, dpnp.bool, None) + result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(input1, input2) + cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape, + dpnp.bool, + None, + device=result_sycl_device, + usm_type=result_usm_type, + sycl_queue=result_sycl_queue) for i in range(result.size): result.get_pyobj()[i] = numpy.logical_or(input1.get_pyobj()[i], input2.get_pyobj()[i]) @@ -223,7 +346,13 @@ cpdef utils.dpnp_descriptor dpnp_logical_or(utils.dpnp_descriptor input1, utils. cpdef utils.dpnp_descriptor dpnp_logical_xor(utils.dpnp_descriptor input1, utils.dpnp_descriptor input2): - cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape, dpnp.bool, None) + result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(input1, input2) + cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape, + dpnp.bool, + None, + device=result_sycl_device, + usm_type=result_usm_type, + sycl_queue=result_sycl_queue) for i in range(result.size): result.get_pyobj()[i] = numpy.logical_xor(input1.get_pyobj()[i], input2.get_pyobj()[i]) @@ -232,9 +361,13 @@ cpdef utils.dpnp_descriptor dpnp_logical_xor(utils.dpnp_descriptor input1, utils cpdef utils.dpnp_descriptor dpnp_not_equal(utils.dpnp_descriptor input1, utils.dpnp_descriptor input2): + result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(input1, input2) cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape, dpnp.bool, - None) + None, + device=result_sycl_device, + usm_type=result_usm_type, + sycl_queue=result_sycl_queue) for i in range(result.size): result.get_pyobj()[i] = dpnp.bool(input1.get_pyobj()[i] != input2.get_pyobj()[i]) diff --git a/dpnp/dpnp_algo/dpnp_algo_manipulation.pyx b/dpnp/dpnp_algo/dpnp_algo_manipulation.pyx index f035761f12a2..b2b0c4f9068e 100644 --- a/dpnp/dpnp_algo/dpnp_algo_manipulation.pyx +++ b/dpnp/dpnp_algo/dpnp_algo_manipulation.pyx @@ -47,9 +47,18 @@ __all__ += [ # C function pointer to the C library template functions -ctypedef void(*fptr_custom_elemwise_transpose_1in_1out_t)(void * , shape_elem_type * , shape_elem_type * , - shape_elem_type * , size_t, void * , size_t) -ctypedef void(*fptr_dpnp_repeat_t)(const void *, void * , const size_t , const size_t) +ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_custom_elemwise_transpose_1in_1out_t)(c_dpctl.DPCTLSyclQueueRef, + void * , + shape_elem_type * , + shape_elem_type * , + shape_elem_type * , + size_t, + void * , + size_t, + const c_dpctl.DPCTLEventVectorRef) +ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_repeat_t)(c_dpctl.DPCTLSyclQueueRef, + const void *, void * , const size_t , const size_t, + const c_dpctl.DPCTLEventVectorRef) cpdef utils.dpnp_descriptor dpnp_atleast_2d(utils.dpnp_descriptor arr): @@ -58,7 +67,13 @@ cpdef utils.dpnp_descriptor dpnp_atleast_2d(utils.dpnp_descriptor arr): cdef size_t arr_ndim = arr.ndim cdef long arr_size = arr.size if arr_ndim == 1: - result = utils_py.create_output_descriptor_py((1, arr_size), arr.dtype, None) + arr_obj = arr.get_array() + result = utils_py.create_output_descriptor_py((1, arr_size), + arr.dtype, + None, + device=arr_obj.sycl_device, + usm_type=arr_obj.usm_type, + sycl_queue=arr_obj.sycl_queue) for i in range(arr_size): result.get_pyobj()[0, i] = arr.get_pyobj()[i] return result @@ -72,13 +87,26 @@ cpdef utils.dpnp_descriptor dpnp_atleast_3d(utils.dpnp_descriptor arr): cdef size_t arr_ndim = arr.ndim cdef shape_type_c arr_shape = arr.shape cdef long arr_size = arr.size + + arr_obj = arr.get_array() + if arr_ndim == 1: - result = utils_py.create_output_descriptor_py((1, 1, arr_size), arr.dtype, None) + result = utils_py.create_output_descriptor_py((1, 1, arr_size), + arr.dtype, + None, + device=arr_obj.sycl_device, + usm_type=arr_obj.usm_type, + sycl_queue=arr_obj.sycl_queue) for i in range(arr_size): result.get_pyobj()[0, 0, i] = arr.get_pyobj()[i] return result elif arr_ndim == 2: - result = utils_py.create_output_descriptor_py((1, arr_shape[0], arr_shape[1]), arr.dtype, None) + result = utils_py.create_output_descriptor_py((1, arr_shape[0], arr_shape[1]), + arr.dtype, + None, + device=arr_obj.sycl_device, + usm_type=arr_obj.usm_type, + sycl_queue=arr_obj.sycl_queue) for i in range(arr_shape[0]): for j in range(arr_shape[1]): result.get_pyobj()[0, i, j] = arr.get_pyobj()[i, j] @@ -122,7 +150,7 @@ cpdef dpnp_copyto(utils.dpnp_descriptor dst, utils.dpnp_descriptor src, where=Tr NULL, NULL) # dep_events_ref - with nogil: c_dpctl.DPCTLEvent_Wait(event_ref) + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) c_dpctl.DPCTLEvent_Delete(event_ref) @@ -152,22 +180,39 @@ cpdef utils.dpnp_descriptor dpnp_expand_dims(utils.dpnp_descriptor in_array, axi shape_list.push_back(in_array.shape[axis_idx]) axis_idx = axis_idx + 1 - cdef utils.dpnp_descriptor result = dpnp.get_dpnp_descriptor(dpnp.reshape(dpnp_copy(in_array).get_pyobj(), (shape_list))) - - return result + return dpnp_reshape(in_array, shape_list) cpdef utils.dpnp_descriptor dpnp_repeat(utils.dpnp_descriptor array1, repeats, axes=None): cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(array1.dtype) - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_REPEAT, param1_type, param1_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_REPEAT_EXT, param1_type, param1_type) + + array1_obj = array1.get_array() # ceate result array with type given by FPTR data cdef shape_type_c result_shape = (array1.size * repeats, ) - cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, kernel_data.return_type, None) + cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, + kernel_data.return_type, + None, + device=array1_obj.sycl_device, + usm_type=array1_obj.usm_type, + sycl_queue=array1_obj.sycl_queue) + result_sycl_queue = result.get_array().sycl_queue + + cdef c_dpctl.SyclQueue q = result_sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() cdef fptr_dpnp_repeat_t func = kernel_data.ptr - func(array1.get_data(), result.get_data(), repeats, array1.size) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, + array1.get_data(), + result.get_data(), + repeats, + array1.size, + NULL) # dep_events_ref + + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) return result @@ -175,8 +220,14 @@ cpdef utils.dpnp_descriptor dpnp_repeat(utils.dpnp_descriptor array1, repeats, a cpdef utils.dpnp_descriptor dpnp_reshape(utils.dpnp_descriptor array1, newshape, order="C"): # return dpnp.get_dpnp_descriptor(dpctl.tensor.usm_ndarray(newshape, dtype=numpy.dtype(array1.dtype).name, buffer=array1.get_pyobj())) # return dpnp.get_dpnp_descriptor(dpctl.tensor.reshape(array1.get_pyobj(), newshape)) - array_obj = dpctl.tensor.reshape(array1.get_array(), newshape, order=order) - return dpnp.get_dpnp_descriptor(dpnp_array(array_obj.shape, buffer=array_obj, order=order)) + array1_obj = array1.get_array() + array_obj = dpctl.tensor.reshape(array1_obj, newshape, order=order) + return dpnp.get_dpnp_descriptor(dpnp_array(array_obj.shape, + buffer=array_obj, + order=order, + device=array1_obj.sycl_device, + usm_type=array1_obj.usm_type, + sycl_queue=array1_obj.sycl_queue)) cpdef utils.dpnp_descriptor dpnp_transpose(utils.dpnp_descriptor array1, axes=None): @@ -209,15 +260,36 @@ cpdef utils.dpnp_descriptor dpnp_transpose(utils.dpnp_descriptor array1, axes=No cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(array1.dtype) # get the FPTR data structure - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_TRANSPOSE, param1_type, param1_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_TRANSPOSE_EXT, param1_type, param1_type) + + array1_obj = array1.get_array() # ceate result array with type given by FPTR data - cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, kernel_data.return_type, None) + cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, + kernel_data.return_type, + None, + device=array1_obj.sycl_device, + usm_type=array1_obj.usm_type, + sycl_queue=array1_obj.sycl_queue) + result_sycl_queue = result.get_array().sycl_queue + + cdef c_dpctl.SyclQueue q = result_sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() cdef fptr_custom_elemwise_transpose_1in_1out_t func = kernel_data.ptr # call FPTR function - func(array1.get_data(), input_shape.data(), result_shape.data(), - permute_axes.data(), input_shape_size, result.get_data(), array1.size) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, + array1.get_data(), + input_shape.data(), + result_shape.data(), + permute_axes.data(), + input_shape_size, + result.get_data(), + array1.size, + NULL) # dep_events_ref + + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) return result @@ -237,6 +309,6 @@ cpdef utils.dpnp_descriptor dpnp_squeeze(utils.dpnp_descriptor in_array, axis): else: shape_list.push_back(in_array.shape[i]) - cdef utils.dpnp_descriptor result = dpnp.get_dpnp_descriptor(dpnp.reshape(dpnp_copy(in_array).get_pyobj(), (shape_list))) + in_array_obj = in_array.get_array() - return result + return dpnp_reshape(dpnp_copy(in_array), shape_list) diff --git a/dpnp/dpnp_algo/dpnp_algo_mathematical.pyx b/dpnp/dpnp_algo/dpnp_algo_mathematical.pyx index b2c070d95b47..142163054bcd 100644 --- a/dpnp/dpnp_algo/dpnp_algo_mathematical.pyx +++ b/dpnp/dpnp_algo/dpnp_algo_mathematical.pyx @@ -74,28 +74,51 @@ __all__ += [ ] -ctypedef void(*fptr_custom_elemwise_absolute_1in_1out_t)(void * , void * , size_t) -ctypedef void(*fptr_1in_2out_t)(void * , void * , void * , size_t) -ctypedef void(*ftpr_custom_trapz_2in_1out_with_2size_t)(void *, void * , void * , double, size_t, size_t) -ctypedef void(*ftpr_custom_around_1in_1out_t)(const void * , void * , const size_t, const int) - - -cpdef utils.dpnp_descriptor dpnp_absolute(utils.dpnp_descriptor input): - cdef shape_type_c input_shape = input.shape - cdef size_t input_shape_size = input.ndim +ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_custom_elemwise_absolute_1in_1out_t)(c_dpctl.DPCTLSyclQueueRef, + void * , void * , size_t, + const c_dpctl.DPCTLEventVectorRef) +ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_1in_2out_t)(c_dpctl.DPCTLSyclQueueRef, + void * , void * , void * , size_t, + const c_dpctl.DPCTLEventVectorRef) +ctypedef c_dpctl.DPCTLSyclEventRef(*ftpr_custom_trapz_2in_1out_with_2size_t)(c_dpctl.DPCTLSyclQueueRef, + void *, void * , void * , double, size_t, size_t, + const c_dpctl.DPCTLEventVectorRef) +ctypedef c_dpctl.DPCTLSyclEventRef(*ftpr_custom_around_1in_1out_t)(c_dpctl.DPCTLSyclQueueRef, + const void * , void * , const size_t, const int, + const c_dpctl.DPCTLEventVectorRef) + + +cpdef utils.dpnp_descriptor dpnp_absolute(utils.dpnp_descriptor x1): + cdef shape_type_c x1_shape = x1.shape + cdef size_t x1_shape_size = x1.ndim # convert string type names (array.dtype) to C enum DPNPFuncType - cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(input.dtype) + cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(x1.dtype) # get the FPTR data structure - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_ABSOLUTE, param1_type, param1_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_ABSOLUTE_EXT, param1_type, param1_type) + + x1_obj = x1.get_array() # ceate result array with type given by FPTR data - cdef utils.dpnp_descriptor result = utils.create_output_descriptor(input_shape, kernel_data.return_type, None) + cdef utils.dpnp_descriptor result = utils.create_output_descriptor(x1_shape, + kernel_data.return_type, + None, + device=x1_obj.sycl_device, + usm_type=x1_obj.usm_type, + sycl_queue=x1_obj.sycl_queue) + + result_sycl_queue = result.get_array().sycl_queue + + cdef c_dpctl.SyclQueue q = result_sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() cdef fptr_custom_elemwise_absolute_1in_1out_t func = kernel_data.ptr # call FPTR function - func(input.get_data(), result.get_data(), input.size) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, x1.get_data(), result.get_data(), x1.size, NULL) + + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) return result @@ -120,15 +143,30 @@ cpdef utils.dpnp_descriptor dpnp_around(utils.dpnp_descriptor x1, int decimals): cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(x1.dtype) - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_AROUND, param1_type, param1_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_AROUND_EXT, param1_type, param1_type) + + x1_obj = x1.get_array() # ceate result array with type given by FPTR data cdef shape_type_c result_shape = x1.shape - cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, kernel_data.return_type, None) + cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, + kernel_data.return_type, + None, + device=x1_obj.sycl_device, + usm_type=x1_obj.usm_type, + sycl_queue=x1_obj.sycl_queue) + + result_sycl_queue = result.get_array().sycl_queue + + cdef c_dpctl.SyclQueue q = result_sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() cdef ftpr_custom_around_1in_1out_t func = kernel_data.ptr - func(x1.get_data(), result.get_data(), x1.size, decimals) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, x1.get_data(), result.get_data(), x1.size, decimals, NULL) + + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) return result @@ -184,11 +222,23 @@ cpdef utils.dpnp_descriptor dpnp_cumsum(utils.dpnp_descriptor x1): cpdef utils.dpnp_descriptor dpnp_diff(utils.dpnp_descriptor x1, int n): cdef utils.dpnp_descriptor res + x1_obj = x1.get_array() + if x1.size - n < 1: - res = utils.dpnp_descriptor(dpnp.empty(0, dtype=x1.dtype)) + res_obj = dpnp_container.empty(0, + dtype=x1.dtype, + device=x1_obj.sycl_device, + usm_type=x1_obj.usm_type, + sycl_queue=x1_obj.sycl_queue) + res = utils.dpnp_descriptor(res_obj) return res - res = utils.dpnp_descriptor(dpnp.empty(x1.size - 1, dtype=x1.dtype)) + res_obj = dpnp_container.empty(x1.size - 1, + dtype=x1.dtype, + device=x1_obj.sycl_device, + usm_type=x1_obj.usm_type, + sycl_queue=x1_obj.sycl_queue) + res = utils.dpnp_descriptor(res_obj) for i in range(res.size): res.get_pyobj()[i] = x1.get_pyobj()[i + 1] - x1.get_pyobj()[i] @@ -256,7 +306,7 @@ cpdef utils.dpnp_descriptor dpnp_ediff1d(utils.dpnp_descriptor x1): NULL, NULL) # dep_events_ref - with nogil: c_dpctl.DPCTLEvent_Wait(event_ref) + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) c_dpctl.DPCTLEvent_Delete(event_ref) return result @@ -290,9 +340,16 @@ cpdef utils.dpnp_descriptor dpnp_gradient(utils.dpnp_descriptor y1, int dx=1): cdef size_t size = y1.size + y1_obj = y1.get_array() + # ceate result array with type given by FPTR data cdef shape_type_c result_shape = utils._object_to_tuple(size) - cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(result_shape, dpnp.float64, None) + cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(result_shape, + dpnp.float64, + None, + device=y1_obj.sycl_device, + usm_type=y1_obj.usm_type, + sycl_queue=y1_obj.sycl_queue) cdef double cur = (y1.get_pyobj()[1] - y1.get_pyobj()[0]) / dx @@ -338,16 +395,41 @@ cpdef tuple dpnp_modf(utils.dpnp_descriptor x1): cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(x1.dtype) """ get the FPTR data structure """ - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_MODF, param1_type, DPNP_FT_NONE) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_MODF_EXT, param1_type, DPNP_FT_NONE) + + x1_obj = x1.get_array() # ceate result array with type given by FPTR data cdef shape_type_c result_shape = x1.shape - cdef utils.dpnp_descriptor result1 = utils.create_output_descriptor(result_shape, kernel_data.return_type, None) - cdef utils.dpnp_descriptor result2 = utils.create_output_descriptor(result_shape, kernel_data.return_type, None) + cdef utils.dpnp_descriptor result1 = utils.create_output_descriptor(result_shape, + kernel_data.return_type, + None, + device=x1_obj.sycl_device, + usm_type=x1_obj.usm_type, + sycl_queue=x1_obj.sycl_queue) + cdef utils.dpnp_descriptor result2 = utils.create_output_descriptor(result_shape, + kernel_data.return_type, + None, + device=x1_obj.sycl_device, + usm_type=x1_obj.usm_type, + sycl_queue=x1_obj.sycl_queue) + + _, _, result_sycl_queue = utils.get_common_usm_allocation(result1, result2) + + cdef c_dpctl.SyclQueue q = result_sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() cdef fptr_1in_2out_t func = kernel_data.ptr """ Call FPTR function """ - func(x1.get_data(), result1.get_data(), result2.get_data(), x1.size) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, + x1.get_data(), + result1.get_data(), + result2.get_data(), + x1.size, + NULL) # dep_events_ref + + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) return (result1.get_pyobj(), result2.get_pyobj()) @@ -387,7 +469,13 @@ cpdef utils.dpnp_descriptor dpnp_nancumsum(utils.dpnp_descriptor x1): cpdef utils.dpnp_descriptor dpnp_nanprod(utils.dpnp_descriptor x1): - cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(x1.shape, x1.dtype, None) + x1_obj = x1.get_array() + cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(x1.shape, + x1.dtype, + None, + device=x1_obj.sycl_device, + usm_type=x1_obj.usm_type, + sycl_queue=x1_obj.sycl_queue) for i in range(result.size): input_elem = x1.get_pyobj().flat[i] @@ -401,7 +489,13 @@ cpdef utils.dpnp_descriptor dpnp_nanprod(utils.dpnp_descriptor x1): cpdef utils.dpnp_descriptor dpnp_nansum(utils.dpnp_descriptor x1): - cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(x1.shape, x1.dtype, None) + x1_obj = x1.get_array() + cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(x1.shape, + x1.dtype, + None, + device=x1_obj.sycl_device, + usm_type=x1_obj.usm_type, + sycl_queue=x1_obj.sycl_queue) for i in range(result.size): input_elem = x1.get_pyobj().flat[i] @@ -426,7 +520,7 @@ cpdef utils.dpnp_descriptor dpnp_power(utils.dpnp_descriptor x1_obj, return call_fptr_2in_1out_strides(DPNP_FN_POWER_EXT, x1_obj, x2_obj, dtype, out, where, func_name="power") -cpdef utils.dpnp_descriptor dpnp_prod(utils.dpnp_descriptor input, +cpdef utils.dpnp_descriptor dpnp_prod(utils.dpnp_descriptor x1, object axis=None, object dtype=None, utils.dpnp_descriptor out=None, @@ -443,24 +537,47 @@ cpdef utils.dpnp_descriptor dpnp_prod(utils.dpnp_descriptor input, input:complex128: outout:complex128: name:prod """ - cdef shape_type_c input_shape = input.shape - cdef DPNPFuncType input_c_type = dpnp_dtype_to_DPNPFuncType(input.dtype) + cdef shape_type_c x1_shape = x1.shape + cdef DPNPFuncType x1_c_type = dpnp_dtype_to_DPNPFuncType(x1.dtype) cdef shape_type_c axis_shape = utils._object_to_tuple(axis) - cdef shape_type_c result_shape = utils.get_reduction_output_shape(input_shape, axis, keepdims) - cdef DPNPFuncType result_c_type = utils.get_output_c_type(DPNP_FN_PROD, input_c_type, out, dtype) + cdef shape_type_c result_shape = utils.get_reduction_output_shape(x1_shape, axis, keepdims) + cdef DPNPFuncType result_c_type = utils.get_output_c_type(DPNP_FN_PROD_EXT, x1_c_type, out, dtype) """ select kernel """ - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_PROD, input_c_type, result_c_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_PROD_EXT, x1_c_type, result_c_type) + + x1_obj = x1.get_array() """ Create result array """ - cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, result_c_type, out) + cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, + result_c_type, + out, + device=x1_obj.sycl_device, + usm_type=x1_obj.usm_type, + sycl_queue=x1_obj.sycl_queue) cdef dpnp_reduction_c_t func = kernel_data.ptr + result_sycl_queue = result.get_array().sycl_queue + + cdef c_dpctl.SyclQueue q = result_sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() + """ Call FPTR interface function """ - func(result.get_data(), input.get_data(), input_shape.data(), - input_shape.size(), axis_shape.data(), axis_shape.size(), NULL, NULL) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, + result.get_data(), + x1.get_data(), + x1_shape.data(), + x1_shape.size(), + axis_shape.data(), + axis_shape.size(), + NULL, + NULL, + NULL) # dep_events_ref + + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) return result @@ -485,7 +602,7 @@ cpdef utils.dpnp_descriptor dpnp_subtract(utils.dpnp_descriptor x1_obj, return call_fptr_2in_1out_strides(DPNP_FN_SUBTRACT_EXT, x1_obj, x2_obj, dtype, out, where) -cpdef utils.dpnp_descriptor dpnp_sum(utils.dpnp_descriptor input, +cpdef utils.dpnp_descriptor dpnp_sum(utils.dpnp_descriptor x1, object axis=None, object dtype=None, utils.dpnp_descriptor out=None, @@ -493,24 +610,47 @@ cpdef utils.dpnp_descriptor dpnp_sum(utils.dpnp_descriptor input, object initial=None, object where=True): - cdef shape_type_c input_shape = input.shape - cdef DPNPFuncType input_c_type = dpnp_dtype_to_DPNPFuncType(input.dtype) + cdef shape_type_c x1_shape = x1.shape + cdef DPNPFuncType x1_c_type = dpnp_dtype_to_DPNPFuncType(x1.dtype) cdef shape_type_c axis_shape = utils._object_to_tuple(axis) - cdef shape_type_c result_shape = utils.get_reduction_output_shape(input_shape, axis, keepdims) - cdef DPNPFuncType result_c_type = utils.get_output_c_type(DPNP_FN_SUM, input_c_type, out, dtype) + cdef shape_type_c result_shape = utils.get_reduction_output_shape(x1_shape, axis, keepdims) + cdef DPNPFuncType result_c_type = utils.get_output_c_type(DPNP_FN_SUM_EXT, x1_c_type, out, dtype) """ select kernel """ - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_SUM, input_c_type, result_c_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_SUM_EXT, x1_c_type, result_c_type) + + x1_obj = x1.get_array() """ Create result array """ - cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, result_c_type, out) + cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, + result_c_type, + out, + device=x1_obj.sycl_device, + usm_type=x1_obj.usm_type, + sycl_queue=x1_obj.sycl_queue) + + result_sycl_queue = result.get_array().sycl_queue + + cdef c_dpctl.SyclQueue q = result_sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() """ Call FPTR interface function """ cdef dpnp_reduction_c_t func = kernel_data.ptr - func(result.get_data(), input.get_data(), input_shape.data(), - input_shape.size(), axis_shape.data(), axis_shape.size(), NULL, NULL) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, + result.get_data(), + x1.get_data(), + x1_shape.data(), + x1_shape.size(), + axis_shape.data(), + axis_shape.size(), + NULL, + NULL, + NULL) # dep_events_ref + + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) return result @@ -519,14 +659,36 @@ cpdef utils.dpnp_descriptor dpnp_trapz(utils.dpnp_descriptor y1, utils.dpnp_desc cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(y1.dtype) cdef DPNPFuncType param2_type = dpnp_dtype_to_DPNPFuncType(x1.dtype) - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_TRAPZ, param1_type, param2_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_TRAPZ_EXT, param1_type, param2_type) + + result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(y1, x1) # ceate result array with type given by FPTR data cdef shape_type_c result_shape = (1,) - cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, kernel_data.return_type, None) + cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, + kernel_data.return_type, + None, + device=result_sycl_device, + usm_type=result_usm_type, + sycl_queue=result_sycl_queue) + + result_sycl_queue = result.get_array().sycl_queue + + cdef c_dpctl.SyclQueue q = result_sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() cdef ftpr_custom_trapz_2in_1out_with_2size_t func = kernel_data.ptr - func(y1.get_data(), x1.get_data(), result.get_data(), dx, y1.size, x1.size) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, + y1.get_data(), + x1.get_data(), + result.get_data(), + dx, + y1.size, + x1.size, + NULL) # dep_events_ref + + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) return result diff --git a/dpnp/dpnp_algo/dpnp_algo_searching.pyx b/dpnp/dpnp_algo/dpnp_algo_searching.pyx index 249d1453bea3..59ce8475181a 100644 --- a/dpnp/dpnp_algo/dpnp_algo_searching.pyx +++ b/dpnp/dpnp_algo/dpnp_algo_searching.pyx @@ -41,22 +41,43 @@ __all__ += [ # C function pointer to the C library template functions -ctypedef void(*custom_search_1in_1out_func_ptr_t)(void * , void * , size_t) +ctypedef c_dpctl.DPCTLSyclEventRef(*custom_search_1in_1out_func_ptr_t)(c_dpctl.DPCTLSyclQueueRef, + void * , void * , size_t, + const c_dpctl.DPCTLEventVectorRef) cpdef utils.dpnp_descriptor dpnp_argmax(utils.dpnp_descriptor in_array1): cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(in_array1.dtype) cdef DPNPFuncType output_type = dpnp_dtype_to_DPNPFuncType(dpnp.int64) - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_ARGMAX, param1_type, output_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_ARGMAX_EXT, param1_type, output_type) + + in_array1_obj = in_array1.get_array() # ceate result array with type given by FPTR data cdef shape_type_c result_shape = (1,) - cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, kernel_data.return_type, None) + cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, + kernel_data.return_type, + None, + device=in_array1_obj.sycl_device, + usm_type=in_array1_obj.usm_type, + sycl_queue=in_array1_obj.sycl_queue) + + result_sycl_queue = result.get_array().sycl_queue + + cdef c_dpctl.SyclQueue q = result_sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() cdef custom_search_1in_1out_func_ptr_t func = kernel_data.ptr - func(in_array1.get_data(), result.get_data(), in_array1.size) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, + in_array1.get_data(), + result.get_data(), + in_array1.size, + NULL) # dep_events_ref + + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) return result @@ -65,14 +86,33 @@ cpdef utils.dpnp_descriptor dpnp_argmin(utils.dpnp_descriptor in_array1): cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(in_array1.dtype) cdef DPNPFuncType output_type = dpnp_dtype_to_DPNPFuncType(dpnp.int64) - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_ARGMIN, param1_type, output_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_ARGMIN_EXT, param1_type, output_type) + + in_array1_obj = in_array1.get_array() # ceate result array with type given by FPTR data cdef shape_type_c result_shape = (1,) - cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, kernel_data.return_type, None) + cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, + kernel_data.return_type, + None, + device=in_array1_obj.sycl_device, + usm_type=in_array1_obj.usm_type, + sycl_queue=in_array1_obj.sycl_queue) + + result_sycl_queue = result.get_array().sycl_queue + + cdef c_dpctl.SyclQueue q = result_sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() cdef custom_search_1in_1out_func_ptr_t func = kernel_data.ptr - func(in_array1.get_data(), result.get_data(), in_array1.size) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, + in_array1.get_data(), + result.get_data(), + in_array1.size, + NULL) # dep_events_ref + + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) return result diff --git a/dpnp/dpnp_algo/dpnp_algo_sorting.pyx b/dpnp/dpnp_algo/dpnp_algo_sorting.pyx index 93a76e81a18b..9a701dd7c905 100644 --- a/dpnp/dpnp_algo/dpnp_algo_sorting.pyx +++ b/dpnp/dpnp_algo/dpnp_algo_sorting.pyx @@ -42,8 +42,22 @@ __all__ += [ ] -ctypedef void(*fptr_dpnp_partition_t)(void * , void * , void * , const size_t , const shape_elem_type * , const size_t) -ctypedef void(*fptr_dpnp_searchsorted_t)(void * , const void * , const void * , bool , const size_t , const size_t ) +ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_partition_t)(c_dpctl.DPCTLSyclQueueRef, + void * , + void * , + void * , + const size_t, + const shape_elem_type * , + const size_t, + const c_dpctl.DPCTLEventVectorRef) +ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_searchsorted_t)(c_dpctl.DPCTLSyclQueueRef, + void * , + const void * , + const void * , + bool, + const size_t, + const size_t, + const c_dpctl.DPCTLEventVectorRef) cpdef utils.dpnp_descriptor dpnp_argsort(utils.dpnp_descriptor x1): @@ -59,15 +73,37 @@ cpdef utils.dpnp_descriptor dpnp_partition(utils.dpnp_descriptor arr, int kth, a cdef size_t kth_ = kth if kth >= 0 else (arr.ndim + kth) cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(arr.dtype) - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_PARTITION, param1_type, param1_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_PARTITION_EXT, param1_type, param1_type) cdef utils.dpnp_descriptor arr2 = dpnp_copy(arr) - cdef utils.dpnp_descriptor result = utils.create_output_descriptor(arr.shape, kernel_data.return_type, None) + arr_obj = arr.get_array() + + cdef utils.dpnp_descriptor result = utils.create_output_descriptor(arr.shape, + kernel_data.return_type, + None, + device=arr_obj.sycl_device, + usm_type=arr_obj.usm_type, + sycl_queue=arr_obj.sycl_queue) + + result_sycl_queue = result.get_array().sycl_queue + + cdef c_dpctl.SyclQueue q = result_sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() cdef fptr_dpnp_partition_t func = kernel_data.ptr - func(arr.get_data(), arr2.get_data(), result.get_data(), kth_, shape1.data(), arr.ndim) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, + arr.get_data(), + arr2.get_data(), + result.get_data(), + kth_, + shape1.data(), + arr.ndim, + NULL) # dep_events_ref + + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) return result @@ -80,13 +116,35 @@ cpdef utils.dpnp_descriptor dpnp_searchsorted(utils.dpnp_descriptor arr, utils.d cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(arr.dtype) - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_SEARCHSORTED, param1_type, param1_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_SEARCHSORTED_EXT, param1_type, param1_type) + + arr_obj = arr.get_array() + + cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(v.shape, + dpnp.int64, + None, + device=arr_obj.sycl_device, + usm_type=arr_obj.usm_type, + sycl_queue=arr_obj.sycl_queue) + + result_sycl_queue = result.get_array().sycl_queue - cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(v.shape, dpnp.int64, None) + cdef c_dpctl.SyclQueue q = result_sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() cdef fptr_dpnp_searchsorted_t func = kernel_data.ptr - func(arr.get_data(), v.get_data(), result.get_data(), side_, arr.size, v.size) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, + arr.get_data(), + v.get_data(), + result.get_data(), + side_, + arr.size, + v.size, + NULL) # dep_events_ref + + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) return result diff --git a/dpnp/dpnp_algo/dpnp_algo_statistics.pyx b/dpnp/dpnp_algo/dpnp_algo_statistics.pyx index 5ff4c785d96f..872209df371f 100644 --- a/dpnp/dpnp_algo/dpnp_algo_statistics.pyx +++ b/dpnp/dpnp_algo/dpnp_algo_statistics.pyx @@ -49,16 +49,31 @@ __all__ += [ # C function pointer to the C library template functions -ctypedef void(*fptr_custom_cov_1in_1out_t)(void *, void * , size_t, size_t) -ctypedef void(*fptr_custom_nanvar_t)(void *, void * , void * , size_t, size_t) -ctypedef void(*fptr_custom_std_var_1in_1out_t)(void *, void * , shape_elem_type * , size_t, - shape_elem_type * , size_t, size_t) +ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_custom_cov_1in_1out_t)(c_dpctl.DPCTLSyclQueueRef, + void *, void * , size_t, size_t, + const c_dpctl.DPCTLEventVectorRef) +ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_custom_nanvar_t)(c_dpctl.DPCTLSyclQueueRef, + void *, void * , void * , size_t, size_t, + const c_dpctl.DPCTLEventVectorRef) +ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_custom_std_var_1in_1out_t)(c_dpctl.DPCTLSyclQueueRef, + void *, void * , shape_elem_type * , size_t, + shape_elem_type * , size_t, size_t, + const c_dpctl.DPCTLEventVectorRef) # C function pointer to the C library template functions -ctypedef void(*custom_statistic_1in_1out_func_ptr_t)(void *, void * , shape_elem_type * , size_t, - shape_elem_type * , size_t) -ctypedef void(*custom_statistic_1in_1out_func_ptr_t_max)(void *, void * , const size_t, shape_elem_type * , size_t, - shape_elem_type * , size_t) +ctypedef c_dpctl.DPCTLSyclEventRef(*custom_statistic_1in_1out_func_ptr_t)(c_dpctl.DPCTLSyclQueueRef, + void *, void * , shape_elem_type * , size_t, + shape_elem_type * , size_t, + const c_dpctl.DPCTLEventVectorRef) +ctypedef c_dpctl.DPCTLSyclEventRef(*custom_statistic_1in_1out_func_ptr_t_max)(c_dpctl.DPCTLSyclQueueRef, + void *, + void * , + const size_t, + shape_elem_type * , + size_t, + shape_elem_type * , + size_t, + const c_dpctl.DPCTLEventVectorRef) cdef utils.dpnp_descriptor call_fptr_custom_std_var_1in_1out(DPNPFuncName fptr_name, utils.dpnp_descriptor x1, ddof): @@ -70,9 +85,21 @@ cdef utils.dpnp_descriptor call_fptr_custom_std_var_1in_1out(DPNPFuncName fptr_n """ get the FPTR data structure """ cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(fptr_name, param_type, DPNP_FT_NONE) + x1_obj = x1.get_array() + # ceate result array with type given by FPTR data cdef shape_type_c result_shape = (1,) - cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, kernel_data.return_type, None) + cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, + kernel_data.return_type, + None, + device=x1_obj.sycl_device, + usm_type=x1_obj.usm_type, + sycl_queue=x1_obj.sycl_queue) + + result_sycl_queue = result.get_array().sycl_queue + + cdef c_dpctl.SyclQueue q = result_sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() cdef fptr_custom_std_var_1in_1out_t func = kernel_data.ptr @@ -81,8 +108,19 @@ cdef utils.dpnp_descriptor call_fptr_custom_std_var_1in_1out(DPNPFuncName fptr_n cdef Py_ssize_t axis_size = 0 """ Call FPTR function """ - func(x1.get_data(), result.get_data(), x1_shape.data(), - x1.ndim, axis.data(), axis_size, ddof) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, + x1.get_data(), + result.get_data(), + x1_shape.data(), + x1.ndim, + axis.data(), + axis_size, + ddof, + NULL) # dep_events_ref + + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) + return result @@ -134,7 +172,7 @@ cpdef utils.dpnp_descriptor dpnp_correlate(utils.dpnp_descriptor x1, utils.dpnp_ NULL, NULL) # dep_events_ref - with nogil: c_dpctl.DPCTLEvent_Wait(event_ref) + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) c_dpctl.DPCTLEvent_Delete(event_ref) return result @@ -151,27 +189,59 @@ cpdef utils.dpnp_descriptor dpnp_cov(utils.dpnp_descriptor array1): cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(array1.dtype) # get the FPTR data structure - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_COV, param1_type, param1_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_COV_EXT, param1_type, param1_type) + + array1_obj = array1.get_array() # ceate result array with type given by FPTR data cdef shape_type_c result_shape = (input_shape[0], input_shape[0]) - cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, kernel_data.return_type, None) + cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, + kernel_data.return_type, + None, + device=array1_obj.sycl_device, + usm_type=array1_obj.usm_type, + sycl_queue=array1_obj.sycl_queue) + + result_sycl_queue = result.get_array().sycl_queue + + cdef c_dpctl.SyclQueue q = result_sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() cdef fptr_custom_cov_1in_1out_t func = kernel_data.ptr # call FPTR function - func(array1.get_data(), result.get_data(), input_shape[0], input_shape[1]) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, + array1.get_data(), + result.get_data(), + input_shape[0], + input_shape[1], + NULL) # dep_events_ref + + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) return result -cdef utils.dpnp_descriptor _dpnp_max(utils.dpnp_descriptor input, _axis_, shape_type_c result_shape): - cdef shape_type_c input_shape = input.shape - cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(input.dtype) +cdef utils.dpnp_descriptor _dpnp_max(utils.dpnp_descriptor x1, _axis_, shape_type_c result_shape): + cdef shape_type_c x1_shape = x1.shape + cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(x1.dtype) - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_MAX, param1_type, param1_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_MAX_EXT, param1_type, param1_type) + + x1_obj = x1.get_array() # ceate result array with type given by FPTR data - cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, kernel_data.return_type, None) + cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, + kernel_data.return_type, + None, + device=x1_obj.sycl_device, + usm_type=x1_obj.usm_type, + sycl_queue=x1_obj.sycl_queue) + + result_sycl_queue = result.get_array().sycl_queue + + cdef c_dpctl.SyclQueue q = result_sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() cdef custom_statistic_1in_1out_func_ptr_t_max func = kernel_data.ptr cdef shape_type_c axis @@ -185,19 +255,24 @@ cdef utils.dpnp_descriptor _dpnp_max(utils.dpnp_descriptor input, _axis_, shape_ axis_.push_back(shape_it) axis_size = len(axis) - func(input.get_data(), - result.get_data(), - result.size, - input_shape.data(), - input.ndim, - axis_.data(), - axis_size) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, + x1.get_data(), + result.get_data(), + result.size, + x1_shape.data(), + x1.ndim, + axis_.data(), + axis_size, + NULL) # dep_events_ref + + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) return result -cpdef utils.dpnp_descriptor dpnp_max(utils.dpnp_descriptor input, axis): - cdef shape_type_c shape_input = input.shape +cpdef utils.dpnp_descriptor dpnp_max(utils.dpnp_descriptor x1, axis): + cdef shape_type_c x1_shape = x1.shape cdef shape_type_c output_shape if axis is None: @@ -206,35 +281,47 @@ cpdef utils.dpnp_descriptor dpnp_max(utils.dpnp_descriptor input, axis): else: if isinstance(axis, int): if axis < 0: - axis_ = tuple([input.ndim - axis]) + axis_ = tuple([x1.ndim - axis]) else: axis_ = tuple([axis]) else: _axis_ = [] for i in range(len(axis)): if axis[i] < 0: - _axis_.append(input.ndim - axis[i]) + _axis_.append(x1.ndim - axis[i]) else: _axis_.append(axis[i]) axis_ = tuple(_axis_) - output_shape.resize(len(shape_input) - len(axis_), 0) + output_shape.resize(len(x1_shape) - len(axis_), 0) ind = 0 - for id, shape_axis in enumerate(shape_input): + for id, shape_axis in enumerate(x1_shape): if id not in axis_: output_shape[ind] = shape_axis ind += 1 - return _dpnp_max(input, axis_, output_shape) + return _dpnp_max(x1, axis_, output_shape) -cpdef utils.dpnp_descriptor _dpnp_mean(utils.dpnp_descriptor input): - cdef shape_type_c input_shape = input.shape - cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(input.dtype) +cpdef utils.dpnp_descriptor _dpnp_mean(utils.dpnp_descriptor x1): + cdef shape_type_c x1_shape = x1.shape + cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(x1.dtype) + + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_MEAN_EXT, param1_type, param1_type) + + x1_obj = x1.get_array() - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_MEAN, param1_type, param1_type) + cdef utils.dpnp_descriptor result = utils.create_output_descriptor((1,), + kernel_data.return_type, + None, + device=x1_obj.sycl_device, + usm_type=x1_obj.usm_type, + sycl_queue=x1_obj.sycl_queue) - cdef utils.dpnp_descriptor result = utils.create_output_descriptor((1,), kernel_data.return_type, None) + result_sycl_queue = result.get_array().sycl_queue + + cdef c_dpctl.SyclQueue q = result_sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() cdef custom_statistic_1in_1out_func_ptr_t func = kernel_data.ptr @@ -242,31 +329,36 @@ cpdef utils.dpnp_descriptor _dpnp_mean(utils.dpnp_descriptor input): cdef shape_type_c axis cdef Py_ssize_t axis_size = 0 - func(input.get_data(), - result.get_data(), - input_shape.data(), - input.ndim, - axis.data(), - axis_size) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, + x1.get_data(), + result.get_data(), + x1_shape.data(), + x1.ndim, + axis.data(), + axis_size, + NULL) # dep_events_ref + + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) return result -cpdef object dpnp_mean(utils.dpnp_descriptor input, axis): +cpdef object dpnp_mean(utils.dpnp_descriptor x1, axis): cdef shape_type_c output_shape if axis is None: - return _dpnp_mean(input).get_pyobj() + return _dpnp_mean(x1).get_pyobj() - cdef long size_input = input.size - cdef shape_type_c shape_input = input.shape + cdef long x1_size = x1.size + cdef shape_type_c x1_shape = x1.shape - if input.dtype == dpnp.float32: + if x1.dtype == dpnp.float32: res_type = dpnp.float32 else: res_type = dpnp.float64 - if size_input == 0: + if x1_size == 0: return dpnp.array([dpnp.nan], dtype=res_type) if isinstance(axis, int): @@ -277,9 +369,9 @@ cpdef object dpnp_mean(utils.dpnp_descriptor input, axis): if axis_ is None: output_shape.push_back(1) else: - output_shape = (0, ) * (len(shape_input) - len(axis_)) + output_shape = (0, ) * (len(x1_shape) - len(axis_)) ind = 0 - for id, shape_axis in enumerate(shape_input): + for id, shape_axis in enumerate(x1_shape): if id not in axis_: output_shape[ind] = shape_axis ind += 1 @@ -290,15 +382,15 @@ cpdef object dpnp_mean(utils.dpnp_descriptor input, axis): prod *= output_shape[i] result_array = [None] * prod - input_shape_offsets = [None] * len(shape_input) + input_shape_offsets = [None] * len(x1_shape) acc = 1 - for i in range(len(shape_input)): - ind = len(shape_input) - 1 - i + for i in range(len(x1_shape)): + ind = len(x1_shape) - 1 - i input_shape_offsets[ind] = acc - acc *= shape_input[ind] + acc *= x1_shape[ind] - output_shape_offsets = [None] * len(shape_input) + output_shape_offsets = [None] * len(x1_shape) acc = 1 if axis_ is not None: @@ -310,7 +402,7 @@ cpdef object dpnp_mean(utils.dpnp_descriptor input, axis): for i in axis_: result_offsets[i] = 0 - for source_idx in range(size_input): + for source_idx in range(x1_size): # reconstruct x,y,z from linear source_idx xyz = [] @@ -346,11 +438,11 @@ cpdef object dpnp_mean(utils.dpnp_descriptor input, axis): else: result_array[result_offset] += input_elem - del_ = size_input + del_ = x1_size if axis_ is not None: - for i in range(len(shape_input)): + for i in range(len(x1_shape)): if i not in axis_: - del_ = del_ / shape_input[i] + del_ = del_ / x1_shape[i] dpnp_array = dpnp.array(result_array, dtype=input.dtype) dpnp_result_array = dpnp.reshape(dpnp_array, output_shape) return dpnp_result_array / del_ @@ -360,9 +452,21 @@ cpdef utils.dpnp_descriptor dpnp_median(utils.dpnp_descriptor array1): cdef shape_type_c x1_shape = array1.shape cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(array1.dtype) - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_MEDIAN, param1_type, param1_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_MEDIAN_EXT, param1_type, param1_type) + + array1_obj = array1.get_array() + + cdef utils.dpnp_descriptor result = utils.create_output_descriptor((1,), + kernel_data.return_type, + None, + device=array1_obj.sycl_device, + usm_type=array1_obj.usm_type, + sycl_queue=array1_obj.sycl_queue) - cdef utils.dpnp_descriptor result = utils.create_output_descriptor((1,), kernel_data.return_type, None) + result_sycl_queue = result.get_array().sycl_queue + + cdef c_dpctl.SyclQueue q = result_sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() cdef custom_statistic_1in_1out_func_ptr_t func = kernel_data.ptr @@ -370,23 +474,40 @@ cpdef utils.dpnp_descriptor dpnp_median(utils.dpnp_descriptor array1): cdef shape_type_c axis cdef Py_ssize_t axis_size = 0 - func(array1.get_data(), - result.get_data(), - x1_shape.data(), - array1.ndim, - axis.data(), - axis_size) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, + array1.get_data(), + result.get_data(), + x1_shape.data(), + array1.ndim, + axis.data(), + axis_size, + NULL) # dep_events_ref + + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) return result -cpdef utils.dpnp_descriptor _dpnp_min(utils.dpnp_descriptor input, _axis_, shape_type_c shape_output): - cdef shape_type_c input_shape = input.shape - cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(input.dtype) +cpdef utils.dpnp_descriptor _dpnp_min(utils.dpnp_descriptor x1, _axis_, shape_type_c shape_output): + cdef shape_type_c x1_shape = x1.shape + cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(x1.dtype) - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_MIN, param1_type, param1_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_MIN_EXT, param1_type, param1_type) - cdef utils.dpnp_descriptor result = utils.create_output_descriptor(shape_output, kernel_data.return_type, None) + x1_obj = x1.get_array() + + cdef utils.dpnp_descriptor result = utils.create_output_descriptor(shape_output, + kernel_data.return_type, + None, + device=x1_obj.sycl_device, + usm_type=x1_obj.usm_type, + sycl_queue=x1_obj.sycl_queue) + + result_sycl_queue = result.get_array().sycl_queue + + cdef c_dpctl.SyclQueue q = result_sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() cdef custom_statistic_1in_1out_func_ptr_t_max func = kernel_data.ptr cdef shape_type_c axis @@ -402,19 +523,24 @@ cpdef utils.dpnp_descriptor _dpnp_min(utils.dpnp_descriptor input, _axis_, shape axis_.push_back(shape_it) axis_size = len(axis) - func(input.get_data(), - result.get_data(), - result.size, - input_shape.data(), - input.ndim, - axis_.data(), - axis_size) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, + x1.get_data(), + result.get_data(), + result.size, + x1_shape.data(), + x1.ndim, + axis_.data(), + axis_size, + NULL) # dep_events_ref + + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) return result -cpdef utils.dpnp_descriptor dpnp_min(utils.dpnp_descriptor input, axis): - cdef shape_type_c shape_input = input.shape +cpdef utils.dpnp_descriptor dpnp_min(utils.dpnp_descriptor x1, axis): + cdef shape_type_c x1_shape = x1.shape cdef shape_type_c shape_output if axis is None: @@ -423,23 +549,23 @@ cpdef utils.dpnp_descriptor dpnp_min(utils.dpnp_descriptor input, axis): else: if isinstance(axis, int): if axis < 0: - axis_ = tuple([input.ndim - axis]) + axis_ = tuple([x1.ndim - axis]) else: axis_ = tuple([axis]) else: _axis_ = [] for i in range(len(axis)): if axis[i] < 0: - _axis_.append(input.ndim - axis[i]) + _axis_.append(x1.ndim - axis[i]) else: _axis_.append(axis[i]) axis_ = tuple(_axis_) - for id, shape_axis in enumerate(shape_input): + for id, shape_axis in enumerate(x1_shape): if id not in axis_: shape_output.push_back(shape_axis) - return _dpnp_min(input, axis_, shape_output) + return _dpnp_min(x1, axis_, shape_output) cpdef utils.dpnp_descriptor dpnp_nanvar(utils.dpnp_descriptor arr, ddof): @@ -449,22 +575,43 @@ cpdef utils.dpnp_descriptor dpnp_nanvar(utils.dpnp_descriptor arr, ddof): res_size = int(arr.size - n) cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(arr.dtype) - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_NANVAR, param1_type, param1_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_NANVAR_EXT, param1_type, param1_type) + + arr_obj = arr.get_array() # ceate result array with type given by FPTR data cdef shape_type_c result_shape = utils._object_to_tuple(res_size) - cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, kernel_data.return_type, None) + cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, + kernel_data.return_type, + None, + device=arr_obj.sycl_device, + usm_type=arr_obj.usm_type, + sycl_queue=arr_obj.sycl_queue) + + result_sycl_queue = result.get_array().sycl_queue + + cdef c_dpctl.SyclQueue q = result_sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() cdef fptr_custom_nanvar_t func = kernel_data.ptr - func(arr.get_data(), mask_arr.get_data(), result.get_data(), result.size, arr.size) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, + arr.get_data(), + mask_arr.get_data(), + result.get_data(), + result.size, + arr.size, + NULL) # dep_events_ref + + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) - return call_fptr_custom_std_var_1in_1out(DPNP_FN_VAR, result, ddof) + return call_fptr_custom_std_var_1in_1out(DPNP_FN_VAR_EXT, result, ddof) cpdef utils.dpnp_descriptor dpnp_std(utils.dpnp_descriptor a, size_t ddof): - return call_fptr_custom_std_var_1in_1out(DPNP_FN_STD, a, ddof) + return call_fptr_custom_std_var_1in_1out(DPNP_FN_STD_EXT, a, ddof) cpdef utils.dpnp_descriptor dpnp_var(utils.dpnp_descriptor a, size_t ddof): - return call_fptr_custom_std_var_1in_1out(DPNP_FN_VAR, a, ddof) + return call_fptr_custom_std_var_1in_1out(DPNP_FN_VAR_EXT, a, ddof) diff --git a/dpnp/dpnp_algo/dpnp_algo_trigonometric.pyx b/dpnp/dpnp_algo/dpnp_algo_trigonometric.pyx index 6edfb87042fb..bf9c4d5e0ed2 100644 --- a/dpnp/dpnp_algo/dpnp_algo_trigonometric.pyx +++ b/dpnp/dpnp_algo/dpnp_algo_trigonometric.pyx @@ -171,7 +171,14 @@ cpdef utils.dpnp_descriptor dpnp_unwrap(utils.dpnp_descriptor array1): if array1.dtype == dpnp.float32: result_type = dpnp.float32 - cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(array1.shape, result_type, None) + array1_obj = array1.get_array() + + cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(array1.shape, + result_type, + None, + device=array1_obj.sycl_device, + usm_type=array1_obj.usm_type, + sycl_queue=array1_obj.sycl_queue) for i in range(result.size): val, = numpy.unwrap([array1.get_pyobj()[i]]) From 9a93e16a86380230d0f99eded225c07379adfdc8 Mon Sep 17 00:00:00 2001 From: Denis Smirnov Date: Mon, 21 Feb 2022 05:21:06 -0600 Subject: [PATCH 03/18] Add tests for compute follows data for math funcs --- .../include/dpnp_gen_1arg_1type_tbl.hpp | 14 +- .../include/dpnp_gen_1arg_2type_tbl.hpp | 56 ++--- dpnp/dpnp_algo/dpnp_algo.pyx | 14 +- dpnp/dpnp_algo/dpnp_algo_mathematical.pyx | 4 +- dpnp/dpnp_iface_arraycreation.py | 2 +- dpnp/dpnp_iface_mathematical.py | 134 +++++----- tests/skipped_tests.tbl | 10 + tests/skipped_tests_gpu.tbl | 10 + tests/test_sycl_queue.py | 238 +++++++++++++++++- 9 files changed, 369 insertions(+), 113 deletions(-) diff --git a/dpnp/backend/include/dpnp_gen_1arg_1type_tbl.hpp b/dpnp/backend/include/dpnp_gen_1arg_1type_tbl.hpp index dd1ca998d5b9..f5ee23d755f2 100644 --- a/dpnp/backend/include/dpnp_gen_1arg_1type_tbl.hpp +++ b/dpnp/backend/include/dpnp_gen_1arg_1type_tbl.hpp @@ -90,20 +90,20 @@ #endif -MACRO_1ARG_1TYPE_OP(dpnp_conjugate_c, std::conj(input_elem), DPNP_QUEUE.submit(kernel_func)) -MACRO_1ARG_1TYPE_OP(dpnp_copy_c, input_elem, DPNP_QUEUE.submit(kernel_func)) +MACRO_1ARG_1TYPE_OP(dpnp_conjugate_c, std::conj(input_elem), q.submit(kernel_func)) +MACRO_1ARG_1TYPE_OP(dpnp_copy_c, input_elem, q.submit(kernel_func)) MACRO_1ARG_1TYPE_OP(dpnp_erf_c, sycl::erf((double)input_elem), - oneapi::mkl::vm::erf(DPNP_QUEUE, input1_size, input1_data, result)) // no sycl::erf for int and long -MACRO_1ARG_1TYPE_OP(dpnp_negative_c, -input_elem, DPNP_QUEUE.submit(kernel_func)) + oneapi::mkl::vm::erf(q, input1_size, input1_data, result)) // no sycl::erf for int and long +MACRO_1ARG_1TYPE_OP(dpnp_negative_c, -input_elem, q.submit(kernel_func)) MACRO_1ARG_1TYPE_OP(dpnp_recip_c, _DataType(1) / input_elem, - DPNP_QUEUE.submit(kernel_func)) // error: no member named 'recip' in namespace 'sycl' + q.submit(kernel_func)) // error: no member named 'recip' in namespace 'sycl' MACRO_1ARG_1TYPE_OP(dpnp_sign_c, sycl::sign((double)input_elem), - DPNP_QUEUE.submit(kernel_func)) // no sycl::sign for int and long + q.submit(kernel_func)) // no sycl::sign for int and long MACRO_1ARG_1TYPE_OP(dpnp_square_c, input_elem* input_elem, - oneapi::mkl::vm::sqr(DPNP_QUEUE, input1_size, input1_data, result)) + oneapi::mkl::vm::sqr(q, input1_size, input1_data, result)) #undef MACRO_1ARG_1TYPE_OP diff --git a/dpnp/backend/include/dpnp_gen_1arg_2type_tbl.hpp b/dpnp/backend/include/dpnp_gen_1arg_2type_tbl.hpp index bfd375b807e1..71caeef9111a 100644 --- a/dpnp/backend/include/dpnp_gen_1arg_2type_tbl.hpp +++ b/dpnp/backend/include/dpnp_gen_1arg_2type_tbl.hpp @@ -92,81 +92,81 @@ MACRO_1ARG_2TYPES_OP(dpnp_acos_c, sycl::acos(input_elem), - oneapi::mkl::vm::acos(DPNP_QUEUE, input1_size, input1_data, result)) + oneapi::mkl::vm::acos(q, input1_size, input1_data, result)) MACRO_1ARG_2TYPES_OP(dpnp_acosh_c, sycl::acosh(input_elem), - oneapi::mkl::vm::acosh(DPNP_QUEUE, input1_size, input1_data, result)) + oneapi::mkl::vm::acosh(q, input1_size, input1_data, result)) MACRO_1ARG_2TYPES_OP(dpnp_asin_c, sycl::asin(input_elem), - oneapi::mkl::vm::asin(DPNP_QUEUE, input1_size, input1_data, result)) + oneapi::mkl::vm::asin(q, input1_size, input1_data, result)) MACRO_1ARG_2TYPES_OP(dpnp_asinh_c, sycl::asinh(input_elem), - oneapi::mkl::vm::asinh(DPNP_QUEUE, input1_size, input1_data, result)) + oneapi::mkl::vm::asinh(q, input1_size, input1_data, result)) MACRO_1ARG_2TYPES_OP(dpnp_atan_c, sycl::atan(input_elem), - oneapi::mkl::vm::atan(DPNP_QUEUE, input1_size, input1_data, result)) + oneapi::mkl::vm::atan(q, input1_size, input1_data, result)) MACRO_1ARG_2TYPES_OP(dpnp_atanh_c, sycl::atanh(input_elem), - oneapi::mkl::vm::atanh(DPNP_QUEUE, input1_size, input1_data, result)) + oneapi::mkl::vm::atanh(q, input1_size, input1_data, result)) MACRO_1ARG_2TYPES_OP(dpnp_cbrt_c, sycl::cbrt(input_elem), - oneapi::mkl::vm::cbrt(DPNP_QUEUE, input1_size, input1_data, result)) + oneapi::mkl::vm::cbrt(q, input1_size, input1_data, result)) MACRO_1ARG_2TYPES_OP(dpnp_ceil_c, sycl::ceil(input_elem), - oneapi::mkl::vm::ceil(DPNP_QUEUE, input1_size, input1_data, result)) -MACRO_1ARG_2TYPES_OP(dpnp_copyto_c, input_elem, DPNP_QUEUE.submit(kernel_func)) + oneapi::mkl::vm::ceil(q, input1_size, input1_data, result)) +MACRO_1ARG_2TYPES_OP(dpnp_copyto_c, input_elem, q.submit(kernel_func)) MACRO_1ARG_2TYPES_OP(dpnp_cos_c, sycl::cos(input_elem), - oneapi::mkl::vm::cos(DPNP_QUEUE, input1_size, input1_data, result)) + oneapi::mkl::vm::cos(q, input1_size, input1_data, result)) MACRO_1ARG_2TYPES_OP(dpnp_cosh_c, sycl::cosh(input_elem), - oneapi::mkl::vm::cosh(DPNP_QUEUE, input1_size, input1_data, result)) -MACRO_1ARG_2TYPES_OP(dpnp_degrees_c, sycl::degrees(input_elem), DPNP_QUEUE.submit(kernel_func)) + oneapi::mkl::vm::cosh(q, input1_size, input1_data, result)) +MACRO_1ARG_2TYPES_OP(dpnp_degrees_c, sycl::degrees(input_elem), q.submit(kernel_func)) MACRO_1ARG_2TYPES_OP(dpnp_exp2_c, sycl::exp2(input_elem), - oneapi::mkl::vm::exp2(DPNP_QUEUE, input1_size, input1_data, result)) + oneapi::mkl::vm::exp2(q, input1_size, input1_data, result)) MACRO_1ARG_2TYPES_OP(dpnp_exp_c, sycl::exp(input_elem), - oneapi::mkl::vm::exp(DPNP_QUEUE, input1_size, input1_data, result)) + oneapi::mkl::vm::exp(q, input1_size, input1_data, result)) MACRO_1ARG_2TYPES_OP(dpnp_expm1_c, sycl::expm1(input_elem), - oneapi::mkl::vm::expm1(DPNP_QUEUE, input1_size, input1_data, result)) + oneapi::mkl::vm::expm1(q, input1_size, input1_data, result)) MACRO_1ARG_2TYPES_OP(dpnp_fabs_c, sycl::fabs(input_elem), - oneapi::mkl::vm::abs(DPNP_QUEUE, input1_size, input1_data, result)) + oneapi::mkl::vm::abs(q, input1_size, input1_data, result)) MACRO_1ARG_2TYPES_OP(dpnp_floor_c, sycl::floor(input_elem), - oneapi::mkl::vm::floor(DPNP_QUEUE, input1_size, input1_data, result)) + oneapi::mkl::vm::floor(q, input1_size, input1_data, result)) MACRO_1ARG_2TYPES_OP(dpnp_log10_c, sycl::log10(input_elem), - oneapi::mkl::vm::log10(DPNP_QUEUE, input1_size, input1_data, result)) + oneapi::mkl::vm::log10(q, input1_size, input1_data, result)) MACRO_1ARG_2TYPES_OP(dpnp_log1p_c, sycl::log1p(input_elem), - oneapi::mkl::vm::log1p(DPNP_QUEUE, input1_size, input1_data, result)) + oneapi::mkl::vm::log1p(q, input1_size, input1_data, result)) MACRO_1ARG_2TYPES_OP(dpnp_log2_c, sycl::log2(input_elem), - oneapi::mkl::vm::log2(DPNP_QUEUE, input1_size, input1_data, result)) + oneapi::mkl::vm::log2(q, input1_size, input1_data, result)) MACRO_1ARG_2TYPES_OP(dpnp_log_c, sycl::log(input_elem), - oneapi::mkl::vm::ln(DPNP_QUEUE, input1_size, input1_data, result)) -MACRO_1ARG_2TYPES_OP(dpnp_radians_c, sycl::radians(input_elem), DPNP_QUEUE.submit(kernel_func)) + oneapi::mkl::vm::ln(q, input1_size, input1_data, result)) +MACRO_1ARG_2TYPES_OP(dpnp_radians_c, sycl::radians(input_elem), q.submit(kernel_func)) MACRO_1ARG_2TYPES_OP(dpnp_sin_c, sycl::sin(input_elem), - oneapi::mkl::vm::sin(DPNP_QUEUE, input1_size, input1_data, result)) + oneapi::mkl::vm::sin(q, input1_size, input1_data, result)) MACRO_1ARG_2TYPES_OP(dpnp_sinh_c, sycl::sinh(input_elem), - oneapi::mkl::vm::sinh(DPNP_QUEUE, input1_size, input1_data, result)) + oneapi::mkl::vm::sinh(q, input1_size, input1_data, result)) MACRO_1ARG_2TYPES_OP(dpnp_sqrt_c, sycl::sqrt(input_elem), - oneapi::mkl::vm::sqrt(DPNP_QUEUE, input1_size, input1_data, result)) + oneapi::mkl::vm::sqrt(q, input1_size, input1_data, result)) MACRO_1ARG_2TYPES_OP(dpnp_tan_c, sycl::tan(input_elem), - oneapi::mkl::vm::tan(DPNP_QUEUE, input1_size, input1_data, result)) + oneapi::mkl::vm::tan(q, input1_size, input1_data, result)) MACRO_1ARG_2TYPES_OP(dpnp_tanh_c, sycl::tanh(input_elem), - oneapi::mkl::vm::tanh(DPNP_QUEUE, input1_size, input1_data, result)) + oneapi::mkl::vm::tanh(q, input1_size, input1_data, result)) MACRO_1ARG_2TYPES_OP(dpnp_trunc_c, sycl::trunc(input_elem), - oneapi::mkl::vm::trunc(DPNP_QUEUE, input1_size, input1_data, result)) + oneapi::mkl::vm::trunc(q, input1_size, input1_data, result)) #undef MACRO_1ARG_2TYPES_OP diff --git a/dpnp/dpnp_algo/dpnp_algo.pyx b/dpnp/dpnp_algo/dpnp_algo.pyx index 5f30dbffbd95..18ab2041d00e 100644 --- a/dpnp/dpnp_algo/dpnp_algo.pyx +++ b/dpnp/dpnp_algo/dpnp_algo.pyx @@ -379,6 +379,8 @@ cdef utils.dpnp_descriptor call_fptr_1in_1out(DPNPFuncName fptr_name, result = out + utils.get_common_usm_allocation(x1, result) # check USM allocation is common + result_sycl_queue = result.get_array().sycl_queue cdef c_dpctl.SyclQueue q = result_sycl_queue @@ -432,6 +434,8 @@ cdef utils.dpnp_descriptor call_fptr_1in_1out_strides(DPNPFuncName fptr_name, result = out + utils.get_common_usm_allocation(x1, result) # check USM allocation is common + result_sycl_queue = result.get_array().sycl_queue cdef c_dpctl.SyclQueue q = result_sycl_queue @@ -484,9 +488,10 @@ cdef utils.dpnp_descriptor call_fptr_2in_1out(DPNPFuncName fptr_name, cdef shape_type_c result_shape = utils.get_common_shape(x1_shape, x2_shape) cdef utils.dpnp_descriptor result + result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(x1_obj, x2_obj) + if out is None: """ Create result array with type given by FPTR data """ - result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(x1_obj, x2_obj) result = utils.create_output_descriptor(result_shape, kernel_data.return_type, None, @@ -501,6 +506,8 @@ cdef utils.dpnp_descriptor call_fptr_2in_1out(DPNPFuncName fptr_name, result = out + utils.get_common_usm_allocation(x1_obj, result) # check USM allocation is common + cdef c_dpctl.SyclQueue q = result_sycl_queue cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() @@ -551,9 +558,10 @@ cdef utils.dpnp_descriptor call_fptr_2in_1out_strides(DPNPFuncName fptr_name, cdef shape_type_c result_shape = utils.get_common_shape(x1_shape, x2_shape) cdef utils.dpnp_descriptor result + result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(x1_obj, x2_obj) + if out is None: """ Create result array with type given by FPTR data """ - result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(x1_obj, x2_obj) result = utils.create_output_descriptor(result_shape, kernel_data.return_type, None, @@ -568,6 +576,8 @@ cdef utils.dpnp_descriptor call_fptr_2in_1out_strides(DPNPFuncName fptr_name, result = out + utils.get_common_usm_allocation(x1_obj, result) # check USM allocation is common + cdef shape_type_c result_strides = utils.strides_to_vector(result.strides, result_shape) result_obj = result.get_array() diff --git a/dpnp/dpnp_algo/dpnp_algo_mathematical.pyx b/dpnp/dpnp_algo/dpnp_algo_mathematical.pyx index 142163054bcd..5d937a7008bb 100644 --- a/dpnp/dpnp_algo/dpnp_algo_mathematical.pyx +++ b/dpnp/dpnp_algo/dpnp_algo_mathematical.pyx @@ -451,7 +451,7 @@ cpdef utils.dpnp_descriptor dpnp_nancumprod(utils.dpnp_descriptor x1): if dpnp.isnan(cur_x1_flatiter[i]): cur_x1_flatiter[i] = 1 - x1_desc = dpnp.get_dpnp_descriptor(cur_x1) + x1_desc = dpnp.get_dpnp_descriptor(cur_x1, copy_when_nondefault_queue=False) return dpnp_cumprod(x1_desc) @@ -464,7 +464,7 @@ cpdef utils.dpnp_descriptor dpnp_nancumsum(utils.dpnp_descriptor x1): if dpnp.isnan(cur_x1_flatiter[i]): cur_x1_flatiter[i] = 0 - x1_desc = dpnp.get_dpnp_descriptor(cur_x1) + x1_desc = dpnp.get_dpnp_descriptor(cur_x1, copy_when_nondefault_queue=False) return dpnp_cumsum(x1_desc) diff --git a/dpnp/dpnp_iface_arraycreation.py b/dpnp/dpnp_iface_arraycreation.py index 65891b1c1ec2..cf0d5087d702 100644 --- a/dpnp/dpnp_iface_arraycreation.py +++ b/dpnp/dpnp_iface_arraycreation.py @@ -390,7 +390,7 @@ def copy(x1, order='K', subok=False): """ - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False) if x1_desc: if order != 'K': pass diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py index 5a9ad98d3167..d4515b43cd3a 100644 --- a/dpnp/dpnp_iface_mathematical.py +++ b/dpnp/dpnp_iface_mathematical.py @@ -142,7 +142,7 @@ def absolute(x1, **kwargs): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc and not kwargs: if not x1_desc.ndim: pass @@ -181,8 +181,8 @@ def add(x1, x2, dtype=None, out=None, where=True, **kwargs): x1_is_scalar = dpnp.isscalar(x1) x2_is_scalar = dpnp.isscalar(x2) - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False) - x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False) + x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False) if x1_desc and x2_desc and not kwargs: if not x1_desc and not x1_is_scalar: @@ -202,7 +202,7 @@ def add(x1, x2, dtype=None, out=None, where=True, **kwargs): elif not where: pass else: - out_desc = dpnp.get_dpnp_descriptor(out) if out is not None else None + out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) if out is not None else None return dpnp_add(x1_desc, x2_desc, dtype, out_desc, where).get_pyobj() return call_origin(numpy.add, x1, x2, dtype=dtype, out=out, where=where, **kwargs) @@ -237,7 +237,7 @@ def around(x1, decimals=0, out=None): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc: if out is not None: pass @@ -277,9 +277,9 @@ def ceil(x1, out=None, **kwargs): """ - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False) if x1_desc and not kwargs: - out_desc = dpnp.get_dpnp_descriptor(out) if out is not None else None + out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) if out is not None else None return dpnp_ceil(x1_desc, out_desc).get_pyobj() return call_origin(numpy.ceil, x1, out=out, **kwargs) @@ -307,7 +307,7 @@ def conjugate(x1, **kwargs): """ - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False) if x1_desc and not kwargs: return dpnp_conjugate(x1_desc).get_pyobj() @@ -359,8 +359,8 @@ def copysign(x1, x2, dtype=None, out=None, where=True, **kwargs): x1_is_scalar = dpnp.isscalar(x1) x2_is_scalar = dpnp.isscalar(x2) - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False) - x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False) + x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False) if x1_desc and x2_desc and not kwargs: if not x1_desc and not x1_is_scalar: @@ -411,8 +411,8 @@ def cross(x1, x2, axisa=-1, axisb=-1, axisc=-1, axis=None): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) - x2_desc = dpnp.get_dpnp_descriptor(x2) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) + x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_nondefault_queue=False) if x1_desc and x2_desc: if x1_desc.size != 3 or x2_desc.size != 3: @@ -460,7 +460,7 @@ def cumprod(x1, **kwargs): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc and not kwargs: return dpnp_cumprod(x1_desc).get_pyobj() @@ -494,7 +494,7 @@ def cumsum(x1, **kwargs): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc and not kwargs: return dpnp_cumsum(x1_desc).get_pyobj() @@ -514,7 +514,7 @@ def diff(x1, n=1, axis=-1, prepend=numpy._NoValue, append=numpy._NoValue): Otherwise the function will be executed sequentially on CPU. """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc: if not isinstance(n, int): pass @@ -524,9 +524,9 @@ def diff(x1, n=1, axis=-1, prepend=numpy._NoValue, append=numpy._NoValue): pass elif axis != -1: pass - elif prepend is not None: + elif prepend is not numpy._NoValue: pass - elif append is not None: + elif append is not numpy._NoValue: pass else: return dpnp_diff(x1_desc, n).get_pyobj() @@ -559,8 +559,8 @@ def divide(x1, x2, dtype=None, out=None, where=True, **kwargs): x1_is_scalar = dpnp.isscalar(x1) x2_is_scalar = dpnp.isscalar(x2) - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False) - x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False) + x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False) if x1_desc and x2_desc and not kwargs: if not x1_desc and not x1_is_scalar: @@ -614,7 +614,7 @@ def ediff1d(x1, to_end=None, to_begin=None): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc: if to_begin is not None: pass @@ -650,7 +650,7 @@ def fabs(x1, **kwargs): """ - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False) if x1_desc: return dpnp_fabs(x1_desc).get_pyobj() @@ -690,9 +690,9 @@ def floor(x1, out=None, **kwargs): """ - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False) if x1_desc and not kwargs: - out_desc = dpnp.get_dpnp_descriptor(out) if out is not None else None + out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) if out is not None else None return dpnp_floor(x1_desc, out_desc).get_pyobj() return call_origin(numpy.floor, x1, out=out, **kwargs) @@ -730,8 +730,8 @@ def floor_divide(x1, x2, dtype=None, out=None, where=True, **kwargs): x1_is_scalar = dpnp.isscalar(x1) x2_is_scalar = dpnp.isscalar(x2) - x1_desc = dpnp.get_dpnp_descriptor(x1) - x2_desc = dpnp.get_dpnp_descriptor(x2) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) + x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_nondefault_queue=False) if x1_desc and x2_desc and not kwargs: if not x1_desc and not x1_is_scalar: @@ -747,6 +747,7 @@ def floor_divide(x1, x2, dtype=None, out=None, where=True, **kwargs): elif x2_is_scalar and not x2_desc: pass elif x1_desc and x2_desc and x1_desc.size != x2_desc.size: + # TODO: enable broadcasting pass elif x1_desc and x2_desc and x1_desc.shape != x2_desc.shape: pass @@ -759,7 +760,7 @@ def floor_divide(x1, x2, dtype=None, out=None, where=True, **kwargs): elif x1_is_scalar and x2_desc.ndim > 1: pass else: - out_desc = dpnp.get_dpnp_descriptor(out) if out is not None else None + out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) if out is not None else None return dpnp_floor_divide(x1_desc, x2_desc, dtype, out_desc, where).get_pyobj() return call_origin(numpy.floor_divide, x1, x2, out=out, where=where, dtype=dtype, **kwargs) @@ -839,8 +840,8 @@ def fmod(x1, x2, dtype=None, out=None, where=True, **kwargs): x1_is_scalar = dpnp.isscalar(x1) x2_is_scalar = dpnp.isscalar(x2) - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False) - x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False) + x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False) if x1_desc and x2_desc and not kwargs: if not x1_desc and not x1_is_scalar: @@ -860,7 +861,7 @@ def fmod(x1, x2, dtype=None, out=None, where=True, **kwargs): elif not where: pass else: - out_desc = dpnp.get_dpnp_descriptor(out) if out is not None else None + out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) if out is not None else None return dpnp_fmod(x1_desc, x2_desc, dtype, out_desc, where).get_pyobj() return call_origin(numpy.fmod, x1, x2, dtype=dtype, out=out, where=where, **kwargs) @@ -893,7 +894,7 @@ def gradient(x1, *varargs, **kwargs): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc and not kwargs: if len(varargs) > 1: pass @@ -939,8 +940,8 @@ def maximum(x1, x2, dtype=None, out=None, where=True, **kwargs): x1_is_scalar = dpnp.isscalar(x1) x2_is_scalar = dpnp.isscalar(x2) - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False) - x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False) + x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False) if x1_desc and x2_desc and not kwargs: if not x1_desc and not x1_is_scalar: @@ -996,8 +997,8 @@ def minimum(x1, x2, dtype=None, out=None, where=True, **kwargs): x1_is_scalar = dpnp.isscalar(x1) x2_is_scalar = dpnp.isscalar(x2) - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False) - x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False) + x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False) if x1_desc and x2_desc and not kwargs: if not x1_desc and not x1_is_scalar: @@ -1067,7 +1068,7 @@ def modf(x1, **kwargs): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc and not kwargs: return dpnp_modf(x1_desc) @@ -1100,8 +1101,8 @@ def multiply(x1, x2, dtype=None, out=None, where=True, **kwargs): x1_is_scalar = dpnp.isscalar(x1) x2_is_scalar = dpnp.isscalar(x2) - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False) - x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False) + x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False) if x1_desc and x2_desc and not kwargs: if not x2_desc and not x2_is_scalar: @@ -1154,7 +1155,7 @@ def nancumprod(x1, **kwargs): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc and not kwargs: return dpnp_nancumprod(x1_desc).get_pyobj() @@ -1190,7 +1191,7 @@ def nancumsum(x1, **kwargs): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc and not kwargs: return dpnp_nancumsum(x1_desc).get_pyobj() @@ -1220,7 +1221,7 @@ def nanprod(x1, **kwargs): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc and not kwargs: return dpnp_nanprod(x1_desc).get_pyobj() @@ -1250,7 +1251,7 @@ def nansum(x1, **kwargs): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc and not kwargs: result_obj = dpnp_nansum(x1_desc).get_pyobj() result = dpnp.convert_single_elem_array_to_scalar(result_obj) @@ -1283,7 +1284,7 @@ def negative(x1, **kwargs): """ - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False) if x1_desc and not kwargs: return dpnp_negative(x1_desc).get_pyobj() @@ -1324,8 +1325,8 @@ def power(x1, x2, dtype=None, out=None, where=True, **kwargs): x1_is_scalar = dpnp.isscalar(x1) x2_is_scalar = dpnp.isscalar(x2) - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False) - x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False) + x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False) if x1_desc and x2_desc and not kwargs: if not x1_desc and not x1_is_scalar: @@ -1343,7 +1344,7 @@ def power(x1, x2, dtype=None, out=None, where=True, **kwargs): elif not where: pass else: - out_desc = dpnp.get_dpnp_descriptor(out) if out is not None else None + out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) if out is not None else None return dpnp_power(x1_desc, x2_desc, dtype, out_desc, where).get_pyobj() return call_origin(numpy.power, x1, x2, dtype=dtype, out=out, where=where, **kwargs) @@ -1370,12 +1371,12 @@ def prod(x1, axis=None, dtype=None, out=None, keepdims=False, initial=None, wher """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc: if where is not True: pass else: - out_desc = dpnp.get_dpnp_descriptor(out) if out is not None else None + out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) if out is not None else None result_obj = dpnp_prod(x1_desc, axis, dtype, out_desc, keepdims, initial, where).get_pyobj() result = dpnp.convert_single_elem_array_to_scalar(result_obj, keepdims) @@ -1416,8 +1417,8 @@ def remainder(x1, x2, out=None, where=True, dtype=None, **kwargs): x1_is_scalar = dpnp.isscalar(x1) x2_is_scalar = dpnp.isscalar(x2) - x1_desc = dpnp.get_dpnp_descriptor(x1) - x2_desc = dpnp.get_dpnp_descriptor(x2) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) + x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_nondefault_queue=False) if x1_desc and x2_desc and not kwargs: if not x1_desc and not x1_is_scalar: @@ -1433,6 +1434,7 @@ def remainder(x1, x2, out=None, where=True, dtype=None, **kwargs): elif x2_is_scalar and not x2_desc: pass elif x1_desc and x2_desc and x1_desc.size != x2_desc.size: + # TODO: enable broadcasting pass elif x1_desc and x2_desc and x1_desc.shape != x2_desc.shape: pass @@ -1445,7 +1447,7 @@ def remainder(x1, x2, out=None, where=True, dtype=None, **kwargs): elif x1_is_scalar and x2_desc.ndim > 1: pass else: - out_desc = dpnp.get_dpnp_descriptor(out) if out is not None else None + out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) if out is not None else None return dpnp_remainder(x1_desc, x2_desc, dtype, out_desc, where).get_pyobj() return call_origin(numpy.remainder, x1, x2, out=out, where=where, dtype=dtype, **kwargs) @@ -1488,7 +1490,7 @@ def sign(x1, **kwargs): """ - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False) if x1_desc and not kwargs: return dpnp_sign(x1_desc).get_pyobj() @@ -1520,8 +1522,8 @@ def subtract(x1, x2, dtype=None, out=None, where=True, **kwargs): x1_is_scalar = dpnp.isscalar(x1) x2_is_scalar = dpnp.isscalar(x2) - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False) - x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False) + x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False) if x1_desc and x2_desc and not kwargs: if not x1_desc and not x1_is_scalar: @@ -1545,7 +1547,7 @@ def subtract(x1, x2, dtype=None, out=None, where=True, **kwargs): elif not where: pass else: - out_desc = dpnp.get_dpnp_descriptor(out) if out is not None else None + out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) if out is not None else None return dpnp_subtract(x1_desc, x2_desc, dtype=dtype, out=out_desc, where=where).get_pyobj() return call_origin(numpy.subtract, x1, x2, dtype=dtype, out=out, where=where, **kwargs) @@ -1572,12 +1574,12 @@ def sum(x1, axis=None, dtype=None, out=None, keepdims=False, initial=None, where """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc: if where is not True: pass else: - out_desc = dpnp.get_dpnp_descriptor(out) if out is not None else None + out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) if out is not None else None result_obj = dpnp_sum(x1_desc, axis, dtype, out_desc, keepdims, initial, where).get_pyobj() result = dpnp.convert_single_elem_array_to_scalar(result_obj, keepdims) @@ -1613,13 +1615,23 @@ def trapz(y1, x1=None, dx=1.0, axis=-1): """ - y_desc = dpnp.get_dpnp_descriptor(y1) + y_desc = dpnp.get_dpnp_descriptor(y1, copy_when_nondefault_queue=False) if y_desc: if y_desc.ndim > 1: pass else: - x_obj = dpnp.empty(y_desc.shape, dtype=y_desc.dtype) if x1 is None else x1 - x_desc = dpnp.get_dpnp_descriptor(x_obj) + y_obj = y_desc.get_array() + if x1 is None: + x_obj = dpnp.empty(y_desc.shape, + dtype=y_desc.dtype, + device=y_obj.sycl_device, + usm_type=y_obj.usm_type, + sycl_queue=y_obj.sycl_queue) + else: + x_obj = x1 + + x_desc = dpnp.get_dpnp_descriptor(x_obj, copy_when_nondefault_queue=False) + # TODO: change to "not x_desc" if x_desc: pass elif y_desc.size != x_desc.size: @@ -1680,9 +1692,9 @@ def trunc(x1, out=None, **kwargs): """ - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False) if x1_desc and not kwargs: - out_desc = dpnp.get_dpnp_descriptor(out) if out is not None else None + out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) if out is not None else None return dpnp_trunc(x1_desc, out_desc).get_pyobj() return call_origin(numpy.trunc, x1, out=out, **kwargs) diff --git a/tests/skipped_tests.tbl b/tests/skipped_tests.tbl index f1f078896dd2..5692e09e014b 100644 --- a/tests/skipped_tests.tbl +++ b/tests/skipped_tests.tbl @@ -1,3 +1,13 @@ +tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-conjugate-data2] +tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-trapz-data19] +tests/test_sycl_queue.py::test_1in_1out[opencl:cpu:0-conjugate-data2] +tests/test_sycl_queue.py::test_1in_1out[opencl:cpu:0-trapz-data19] +tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-conjugate-data2] +tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-floor_divide-data12-data22] +tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-remainder-data15-data25] +tests/test_sycl_queue.py::test_broadcasting[opencl:cpu:0-floor_divide-data12-data22] +tests/test_sycl_queue.py::test_broadcasting[opencl:cpu:0-remainder-data15-data25] + tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_15_{axes=(), norm=None, s=None, shape=(2, 3, 4)}::test_fft2 tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_15_{axes=(), norm=None, s=None, shape=(2, 3, 4)}::test_ifft2 tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_16_{axes=(0, 1, 2), norm='ortho', s=(2, 3), shape=(2, 3, 4)}::test_fft2 diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl index 29fd403f750b..833fc84c6b25 100644 --- a/tests/skipped_tests_gpu.tbl +++ b/tests/skipped_tests_gpu.tbl @@ -1,3 +1,13 @@ +tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-conjugate-data2] +tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-trapz-data19] +tests/test_sycl_queue.py::test_1in_1out[opencl:cpu:0-conjugate-data2] +tests/test_sycl_queue.py::test_1in_1out[opencl:cpu:0-trapz-data19] +tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-conjugate-data2] +tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-floor_divide-data12-data22] +tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-remainder-data15-data25] +tests/test_sycl_queue.py::test_broadcasting[opencl:cpu:0-floor_divide-data12-data22] +tests/test_sycl_queue.py::test_broadcasting[opencl:cpu:0-remainder-data15-data25] + tests/test_indexing.py::test_nonzero[[[1, 0], [1, 0]]] tests/test_indexing.py::test_nonzero[[[1, 2], [3, 4]]] tests/test_indexing.py::test_nonzero[[[0, 1, 2], [3, 0, 5], [6, 7, 0]]] diff --git a/tests/test_sycl_queue.py b/tests/test_sycl_queue.py index 66fbfb786f89..00844f048db9 100644 --- a/tests/test_sycl_queue.py +++ b/tests/test_sycl_queue.py @@ -34,20 +34,127 @@ def assert_sycl_queue_equal(result, expected): assert exec_queue is not None +@pytest.mark.parametrize( + "func,data", + [ + pytest.param("abs", + [-1.2, 1.2]), + pytest.param("ceil", + [-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]), + pytest.param("conjugate", + [[1.+1.j, 0.], [0., 1.+1.j]]), + pytest.param("copy", + [1., 2., 3.]), + pytest.param("cumprod", + [[1., 2., 3.], [4., 5., 6.]]), + pytest.param("cumsum", + [[1., 2., 3.], [4., 5., 6.]]), + pytest.param("diff", + [1., 2., 4., 7., 0.]), + pytest.param("ediff1d", + [1., 2., 4., 7., 0.]), + pytest.param("fabs", + [-1.2, 1.2]), + pytest.param("floor", + [-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]), + pytest.param("gradient", + [1., 2., 4., 7., 11., 16.]), + pytest.param("nancumprod", + [1., dpnp.nan]), + pytest.param("nancumsum", + [1., dpnp.nan]), + pytest.param("nanprod", + [1., dpnp.nan]), + pytest.param("nansum", + [1., dpnp.nan]), + pytest.param("negative", + [1., -1.]), + pytest.param("prod", + [1., 2.]), + pytest.param("sign", + [-5., 4.5]), + pytest.param("sum", + [1., 2.]), + pytest.param("trapz", + [[0., 1., 2.], [3., 4., 5.]]), + pytest.param("trunc", + [-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]), + ], +) @pytest.mark.parametrize("device", valid_devices, ids=[device.filter_string for device in valid_devices]) -def test_matmul(device): - data1 = [[1., 1., 1.], [1., 1., 1.]] - data2 = [[1., 1.], [1., 1.], [1., 1.]] +def test_1in_1out(func, data, device): + x_orig = numpy.array(data) + expected = getattr(numpy, func)(x_orig) + x = dpnp.array(data, device=device) + result = getattr(dpnp, func)(x) + + numpy.testing.assert_array_equal(result, expected) + + expected_queue = x.get_array().sycl_queue + result_queue = result.get_array().sycl_queue + + assert_sycl_queue_equal(result_queue, expected_queue) + assert result_queue.sycl_device == expected_queue.sycl_device + + +@pytest.mark.parametrize( + "func,data1,data2", + [ + pytest.param("add", + [0., 1., 2., 3., 4., 5., 6., 7., 8.], + [0., 1., 2., 0., 1., 2., 0., 1., 2.]), + pytest.param("copysign", + [0., 1., 2.], + [-1., 0., 1.]), + pytest.param("cross", + [1., 2., 3.], + [4., 5., 6.]), + pytest.param("divide", + [0., 1., 2., 3., 4.], + [4., 4., 4., 4., 4.]), + pytest.param("floor_divide", + [1., 2., 3., 4.], + [2.5, 2.5, 2.5, 2.5]), + pytest.param("fmod", + [-3., -2., -1., 1., 2., 3.], + [2., 2., 2., 2., 2., 2.]), + pytest.param("maximum", + [2., 3., 4.], + [1., 5., 2.]), + pytest.param("minimum", + [2., 3., 4.], + [1., 5., 2.]), + pytest.param("multiply", + [0., 1., 2., 3., 4., 5., 6., 7., 8.], + [0., 1., 2., 0., 1., 2., 0., 1., 2.]), + pytest.param("power", + [0., 1., 2., 3., 4., 5.], + [1., 2., 3., 3., 2., 1.]), + pytest.param("remainder", + [0., 1., 2., 3., 4., 5., 6.], + [5., 5., 5., 5., 5., 5., 5.]), + pytest.param("subtract", + [0., 1., 2., 3., 4., 5., 6., 7., 8.], + [0., 1., 2., 0., 1., 2., 0., 1., 2.]), + pytest.param("matmul", + [[1., 0.], [0., 1.]], + [[4., 1.], [1., 2.]]), + ], +) +@pytest.mark.parametrize("device", + valid_devices, + ids=[device.filter_string for device in valid_devices]) +def test_2in_1out(func, data1, data2, device): x1_orig = numpy.array(data1) x2_orig = numpy.array(data2) - expected = numpy.matmul(x1_orig, x2_orig) + expected = getattr(numpy, func)(x1_orig, x2_orig) x1 = dpnp.array(data1, device=device) x2 = dpnp.array(data2, device=device) - result = dpnp.matmul(x1, x2) + result = getattr(dpnp, func)(x1, x2) numpy.testing.assert_array_equal(result, expected) @@ -58,16 +165,36 @@ def test_matmul(device): assert result_queue.sycl_device == expected_queue.sycl_device - -@pytest.mark.parametrize("func", - []) +@pytest.mark.parametrize( + "func,data1,data2", + [ + pytest.param("add", + [[0., 1., 2.], [3., 4., 5.], [6., 7., 8.]], + [0., 1., 2.]), + pytest.param("divide", + [0., 1., 2., 3., 4.], + [4.]), + pytest.param("floor_divide", + [1., 2., 3., 4.], + [2.5]), + pytest.param("fmod", + [-3., -2., -1., 1., 2., 3.], + [2.]), + pytest.param("multiply", + [[0., 1., 2.], [3., 4., 5.], [6., 7., 8.]], + [0., 1., 2.]), + pytest.param("remainder", + [0., 1., 2., 3., 4., 5., 6.], + [5.]), + pytest.param("subtract", + [[0., 1., 2.], [3., 4., 5.], [6., 7., 8.]], + [0., 1., 2.]), + ], +) @pytest.mark.parametrize("device", valid_devices, ids=[device.filter_string for device in valid_devices]) -def test_2in_1out(func, device): - data1 = [1., 1., 1., 1., 1.] - data2 = [1., 2., 3., 4., 5.] - +def test_broadcasting(func, data1, data2, device): x1_orig = numpy.array(data1) x2_orig = numpy.array(data2) expected = getattr(numpy, func)(x1_orig, x2_orig) @@ -83,3 +210,90 @@ def test_2in_1out(func, device): assert_sycl_queue_equal(result_queue, expected_queue) assert result_queue.sycl_device == expected_queue.sycl_device + + +@pytest.mark.parametrize( + "func,data1,data2", + [ + pytest.param("add", + [0., 1., 2., 3., 4., 5., 6., 7., 8.], + [0., 1., 2., 0., 1., 2., 0., 1., 2.]), + pytest.param("copysign", + [0., 1., 2.], + [-1., 0., 1.]), + pytest.param("divide", + [0., 1., 2., 3., 4.], + [4., 4., 4., 4., 4.]), + pytest.param("floor_divide", + [1., 2., 3., 4.], + [2.5, 2.5, 2.5, 2.5]), + pytest.param("fmod", + [-3., -2., -1., 1., 2., 3.], + [2., 2., 2., 2., 2., 2.]), + pytest.param("maximum", + [2., 3., 4.], + [1., 5., 2.]), + pytest.param("minimum", + [2., 3., 4.], + [1., 5., 2.]), + pytest.param("multiply", + [0., 1., 2., 3., 4., 5., 6., 7., 8.], + [0., 1., 2., 0., 1., 2., 0., 1., 2.]), + pytest.param("power", + [0., 1., 2., 3., 4., 5.], + [1., 2., 3., 3., 2., 1.]), + pytest.param("remainder", + [0., 1., 2., 3., 4., 5., 6.], + [5., 5., 5., 5., 5., 5., 5.]), + pytest.param("subtract", + [0., 1., 2., 3., 4., 5., 6., 7., 8.], + [0., 1., 2., 0., 1., 2., 0., 1., 2.]), + ], +) +@pytest.mark.parametrize("device", + valid_devices, + ids=[device.filter_string for device in valid_devices]) +def test_out(func, data1, data2, device): + x1_orig = numpy.array(data1) + x2_orig = numpy.array(data2) + expected = numpy.empty(x1_orig.size) + numpy.add(x1_orig, x2_orig, out=expected) + + x1 = dpnp.array(data1, device=device) + x2 = dpnp.array(data2, device=device) + result = dpnp.empty(x1.size, device=device) + dpnp.add(x1, x2, out=result) + + numpy.testing.assert_array_equal(result, expected) + + expected_queue = x1.get_array().sycl_queue + result_queue = result.get_array().sycl_queue + + assert_sycl_queue_equal(result_queue, expected_queue) + assert result_queue.sycl_device == expected_queue.sycl_device + + +@pytest.mark.parametrize("device", + valid_devices, + ids=[device.filter_string for device in valid_devices]) +def test_modf(device): + data = [0, 3.5] + + x_orig = numpy.array(data) + expected1, expected2 = numpy.modf(x_orig) + + x = dpnp.array(data, device=device) + result1, result2 = dpnp.modf(x) + + numpy.testing.assert_array_equal(result1, expected1) + numpy.testing.assert_array_equal(result2, expected2) + + expected_queue = x.get_array().sycl_queue + result1_queue = result1.get_array().sycl_queue + result2_queue = result2.get_array().sycl_queue + + assert_sycl_queue_equal(result1_queue, expected_queue) + assert_sycl_queue_equal(result2_queue, expected_queue) + + assert result1_queue.sycl_device == expected_queue.sycl_device + assert result2_queue.sycl_device == expected_queue.sycl_device From 9f7d19958d19e8228115659e94384c6c77d81431 Mon Sep 17 00:00:00 2001 From: Denis Smirnov Date: Tue, 22 Feb 2022 11:56:05 -0600 Subject: [PATCH 04/18] Add copy_when_nondefault_queue=False to call of get_dpnp_descriptor --- dpnp/dpnp_algo/dpnp_algo_manipulation.pyx | 3 +- dpnp/dpnp_algo/dpnp_algo_statistics.pyx | 3 +- dpnp/dpnp_iface_arraycreation.py | 20 +++--- dpnp/dpnp_iface_bitwise.py | 12 ++-- dpnp/dpnp_iface_counting.py | 2 +- dpnp/dpnp_iface_indexing.py | 44 ++++++------- dpnp/dpnp_iface_linearalgebra.py | 22 +++---- dpnp/dpnp_iface_logic.py | 8 +-- dpnp/dpnp_iface_manipulation.py | 28 ++++---- dpnp/dpnp_iface_searching.py | 4 +- dpnp/dpnp_iface_sorting.py | 10 +-- dpnp/dpnp_iface_statistics.py | 24 +++---- dpnp/dpnp_iface_trigonometric.py | 78 +++++++++++------------ 13 files changed, 130 insertions(+), 128 deletions(-) diff --git a/dpnp/dpnp_algo/dpnp_algo_manipulation.pyx b/dpnp/dpnp_algo/dpnp_algo_manipulation.pyx index b2b0c4f9068e..0a51b44e7e52 100644 --- a/dpnp/dpnp_algo/dpnp_algo_manipulation.pyx +++ b/dpnp/dpnp_algo/dpnp_algo_manipulation.pyx @@ -227,7 +227,8 @@ cpdef utils.dpnp_descriptor dpnp_reshape(utils.dpnp_descriptor array1, newshape, order=order, device=array1_obj.sycl_device, usm_type=array1_obj.usm_type, - sycl_queue=array1_obj.sycl_queue)) + sycl_queue=array1_obj.sycl_queue), + copy_when_nondefault_queue=False) cpdef utils.dpnp_descriptor dpnp_transpose(utils.dpnp_descriptor array1, axes=None): diff --git a/dpnp/dpnp_algo/dpnp_algo_statistics.pyx b/dpnp/dpnp_algo/dpnp_algo_statistics.pyx index 872209df371f..920068c7d631 100644 --- a/dpnp/dpnp_algo/dpnp_algo_statistics.pyx +++ b/dpnp/dpnp_algo/dpnp_algo_statistics.pyx @@ -570,7 +570,8 @@ cpdef utils.dpnp_descriptor dpnp_min(utils.dpnp_descriptor x1, axis): cpdef utils.dpnp_descriptor dpnp_nanvar(utils.dpnp_descriptor arr, ddof): # dpnp_isnan does not support USM array as input in comparison to dpnp.isnan - cdef utils.dpnp_descriptor mask_arr = dpnp.get_dpnp_descriptor(dpnp.isnan(arr.get_pyobj())) + cdef utils.dpnp_descriptor mask_arr = dpnp.get_dpnp_descriptor(dpnp.isnan(arr.get_pyobj()), + copy_when_nondefault_queue=False) n = dpnp.count_nonzero(mask_arr.get_pyobj()) res_size = int(arr.size - n) cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(arr.dtype) diff --git a/dpnp/dpnp_iface_arraycreation.py b/dpnp/dpnp_iface_arraycreation.py index cf0d5087d702..44c7b88c3c78 100644 --- a/dpnp/dpnp_iface_arraycreation.py +++ b/dpnp/dpnp_iface_arraycreation.py @@ -431,7 +431,7 @@ def diag(x1, k=0): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc: if not isinstance(k, int): pass @@ -465,10 +465,10 @@ def diagflat(x1, k=0): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc: input_ravel = dpnp.ravel(x1) - input_ravel_desc = dpnp.get_dpnp_descriptor(input_ravel) + input_ravel_desc = dpnp.get_dpnp_descriptor(input_ravel, copy_when_nondefault_queue=False) return dpnp_diag(input_ravel_desc, k).get_pyobj() @@ -1138,7 +1138,7 @@ def ones_like(x1, dtype=None, order='C', subok=False, shape=None): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc: if order not in ('C', 'c', None): pass @@ -1164,7 +1164,7 @@ def ptp(arr, axis=None, out=None, keepdims=numpy._NoValue): Input array is supported as :obj:`dpnp.ndarray`. Parameters ``out`` and ``keepdims`` are supported only with default values. """ - arr_desc = dpnp.get_dpnp_descriptor(arr) + arr_desc = dpnp.get_dpnp_descriptor(arr, copy_when_nondefault_queue=False) if not arr_desc: pass elif axis is not None and not isinstance(axis, int): @@ -1194,7 +1194,7 @@ def trace(x1, offset=0, axis1=0, axis2=1, dtype=None, out=None): Parameters ``axis1``, ``axis2``, ``out`` and ``dtype`` are supported only with default values. """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc: if x1_desc.size == 0: pass @@ -1271,7 +1271,7 @@ def tril(x1, k=0): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc: if not isinstance(k, int): pass @@ -1301,7 +1301,7 @@ def triu(x1, k=0): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc: if not isinstance(k, int): pass @@ -1340,7 +1340,7 @@ def vander(x1, N=None, increasing=False): [ 1, 5, 25, 125]]) """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc: if x1.ndim != 1: pass @@ -1425,7 +1425,7 @@ def zeros_like(x1, dtype=None, order='C', subok=False, shape=None): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc: if order not in ('C', 'c', None): pass diff --git a/dpnp/dpnp_iface_bitwise.py b/dpnp/dpnp_iface_bitwise.py index 51c41504d421..51a28b0464ea 100644 --- a/dpnp/dpnp_iface_bitwise.py +++ b/dpnp/dpnp_iface_bitwise.py @@ -63,10 +63,10 @@ def _check_nd_call(origin_func, dpnp_func, x1, x2, dtype=None, out=None, where=T x1_is_scalar = dpnp.isscalar(x1) x2_is_scalar = dpnp.isscalar(x2) - x1_desc = dpnp.get_dpnp_descriptor(x1) - x2_desc = dpnp.get_dpnp_descriptor(x2) - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False) - x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) + x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_nondefault_queue=False) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False) + x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False) if x1_desc and x2_desc and not kwargs: if not x1_desc and not x1_is_scalar: @@ -90,7 +90,7 @@ def _check_nd_call(origin_func, dpnp_func, x1, x2, dtype=None, out=None, where=T elif not where: pass else: - out_desc = dpnp.get_dpnp_descriptor(out) if out is not None else None + out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) if out is not None else None return dpnp_func(x1_desc, x2_desc, dtype, out_desc, where).get_pyobj() return call_origin(origin_func, x1, x2, dtype=dtype, out=out, where=where, **kwargs) @@ -228,7 +228,7 @@ def invert(x, **kwargs): """ - x1_desc = dpnp.get_dpnp_descriptor(x) + x1_desc = dpnp.get_dpnp_descriptor(x, copy_when_nondefault_queue=False) if x1_desc and not kwargs: return dpnp_invert(x1_desc).get_pyobj() diff --git a/dpnp/dpnp_iface_counting.py b/dpnp/dpnp_iface_counting.py index 6ea5b1c6eaf9..9f14e3f36bfb 100644 --- a/dpnp/dpnp_iface_counting.py +++ b/dpnp/dpnp_iface_counting.py @@ -75,7 +75,7 @@ def count_nonzero(x1, axis=None, *, keepdims=False): 5 """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc: if axis is not None: pass diff --git a/dpnp/dpnp_iface_indexing.py b/dpnp/dpnp_iface_indexing.py index fa5204650d03..6ff554d89d3a 100644 --- a/dpnp/dpnp_iface_indexing.py +++ b/dpnp/dpnp_iface_indexing.py @@ -81,11 +81,11 @@ def choose(x1, choices, out=None, mode='raise'): -------- :obj:`take_along_axis` : Preferable if choices is an array. """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) choices_list = [] for choice in choices: - choices_list.append(dpnp.get_dpnp_descriptor(choice)) + choices_list.append(dpnp.get_dpnp_descriptor(choice, copy_when_nondefault_queue=False)) if x1_desc: if any(not desc for desc in choices_list): @@ -186,7 +186,7 @@ def diag_indices_from(x1): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc: # original limitation if not x1_desc.ndim >= 2: @@ -216,7 +216,7 @@ def diagonal(x1, offset=0, axis1=0, axis2=1): Otherwise the function will be executed sequentially on CPU. """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc: if not isinstance(offset, int): pass @@ -248,7 +248,7 @@ def fill_diagonal(x1, val, wrap=False): :obj:`dpnp.diag_indices_from` : Return the indices to access the main diagonal of an n-dimensional array. """ - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False) if x1_desc: if not dpnp.isscalar(val): pass @@ -329,7 +329,7 @@ def nonzero(x1): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc: return dpnp_nonzero(x1_desc) @@ -347,9 +347,9 @@ def place(x1, mask, vals): Parameter ``vals`` is supported as 1-D sequence. """ - x1_desc = dpnp.get_dpnp_descriptor(x1) - mask_desc = dpnp.get_dpnp_descriptor(mask) - vals_desc = dpnp.get_dpnp_descriptor(vals) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) + mask_desc = dpnp.get_dpnp_descriptor(mask, copy_when_nondefault_queue=False) + vals_desc = dpnp.get_dpnp_descriptor(vals, copy_when_nondefault_queue=False) if x1_desc and mask_desc and vals_desc: return dpnp_place(x1_desc, mask, vals_desc) @@ -367,7 +367,7 @@ def put(x1, ind, v, mode='raise'): Not supported parameter mode. """ - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False) if x1_desc: if mode != 'raise': pass @@ -391,9 +391,9 @@ def put_along_axis(x1, indices, values, axis): :obj:`take_along_axis` : Take values from the input array by matching 1d index and data slices. """ - x1_desc = dpnp.get_dpnp_descriptor(x1) - indices_desc = dpnp.get_dpnp_descriptor(indices) - values_desc = dpnp.get_dpnp_descriptor(values) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) + indices_desc = dpnp.get_dpnp_descriptor(indices, copy_when_nondefault_queue=False) + values_desc = dpnp.get_dpnp_descriptor(values, copy_when_nondefault_queue=False) if x1_desc and indices_desc and values_desc: if x1_desc.ndim != indices_desc.ndim: pass @@ -419,9 +419,9 @@ def putmask(x1, mask, values): Input arrays ``arr``, ``mask`` and ``values`` are supported as :obj:`dpnp.ndarray`. """ - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False) - mask_desc = dpnp.get_dpnp_descriptor(mask) - values_desc = dpnp.get_dpnp_descriptor(values) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False) + mask_desc = dpnp.get_dpnp_descriptor(mask, copy_when_nondefault_queue=False) + values_desc = dpnp.get_dpnp_descriptor(values, copy_when_nondefault_queue=False) if x1_desc and mask_desc and values_desc: return dpnp_putmask(x1_desc, mask_desc, values_desc) @@ -477,8 +477,8 @@ def take(x1, indices, axis=None, out=None, mode='raise'): :obj:`take_along_axis` : Take elements by matching the array and the index arrays. """ - x1_desc = dpnp.get_dpnp_descriptor(x1) - indices_desc = dpnp.get_dpnp_descriptor(indices) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) + indices_desc = dpnp.get_dpnp_descriptor(indices, copy_when_nondefault_queue=False) if x1_desc and indices_desc: if axis is not None: pass @@ -503,8 +503,8 @@ def take_along_axis(x1, indices, axis): :obj:`put_along_axis` : Put values into the destination array by matching 1d index and data slices. """ - x1_desc = dpnp.get_dpnp_descriptor(x1) - indices_desc = dpnp.get_dpnp_descriptor(indices) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) + indices_desc = dpnp.get_dpnp_descriptor(indices, copy_when_nondefault_queue=False) if x1_desc and indices_desc: if x1_desc.ndim != indices_desc.ndim: pass @@ -578,7 +578,7 @@ def tril_indices_from(x1, k=0): Diagonal offset (see `tril` for details). """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc: if isinstance(k, int): return dpnp_tril_indices_from(x1_desc, k) @@ -635,7 +635,7 @@ def triu_indices_from(x1, k=0): Diagonal offset (see `tril` for details). """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc: if isinstance(k, int): return dpnp_triu_indices_from(x1_desc, k) diff --git a/dpnp/dpnp_iface_linearalgebra.py b/dpnp/dpnp_iface_linearalgebra.py index cf0e0ed2a212..7cd76d7c22b5 100644 --- a/dpnp/dpnp_iface_linearalgebra.py +++ b/dpnp/dpnp_iface_linearalgebra.py @@ -91,8 +91,8 @@ def dot(x1, x2, **kwargs): """ - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False) - x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False) + x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False) if x1_desc and x2_desc and not kwargs: # TODO: remove fallback with scalars when muliply backend func will support strides if(x1_desc.ndim == 0 and x2_desc.strides is not None @@ -183,8 +183,8 @@ def inner(x1, x2, **kwargs): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) - x2_desc = dpnp.get_dpnp_descriptor(x2) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) + x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_nondefault_queue=False) if x1_desc and x2_desc and not kwargs: return dpnp_inner(x1_desc, x2_desc).get_pyobj() @@ -201,8 +201,8 @@ def kron(x1, x2): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) - x2_desc = dpnp.get_dpnp_descriptor(x2) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) + x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_nondefault_queue=False) if x1_desc and x2_desc: return dpnp_kron(x1_desc, x2_desc).get_pyobj() @@ -277,7 +277,7 @@ def matmul(x1, x2, out=None, **kwargs): if (array1_size > cost_size) and (array2_size > cost_size): return dpnp_matmul(x1_desc, x2_desc, out) else: - out_desc = dpnp.get_dpnp_descriptor(out) if out is not None else None + out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) if out is not None else None return dpnp_matmul(x1_desc, x2_desc, out_desc).get_pyobj() return call_origin(numpy.matmul, x1, x2, out=out, **kwargs) @@ -312,8 +312,8 @@ def outer(x1, x2, **kwargs): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) - x2_desc = dpnp.get_dpnp_descriptor(x2) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) + x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_nondefault_queue=False) if x1_desc and x2_desc and not kwargs: return dpnp_outer(x1_desc, x2_desc).get_pyobj() @@ -350,8 +350,8 @@ def tensordot(x1, x2, axes=2): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) - x2_desc = dpnp.get_dpnp_descriptor(x2) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) + x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_nondefault_queue=False) if x1_desc and x2_desc and (axes == 1): return dpnp_tensordot_not_implemented(x1_desc, x2_desc) # dpnp_matmul diff --git a/dpnp/dpnp_iface_logic.py b/dpnp/dpnp_iface_logic.py index aa2f5159986a..0f1e1b5fc0e5 100644 --- a/dpnp/dpnp_iface_logic.py +++ b/dpnp/dpnp_iface_logic.py @@ -108,7 +108,7 @@ def all(x1, axis=None, out=None, keepdims=False): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc: if axis is not None: pass @@ -148,8 +148,8 @@ def allclose(x1, x2, rtol=1.e-5, atol=1.e-8, **kwargs): rtol_is_scalar = dpnp.isscalar(rtol) atol_is_scalar = dpnp.isscalar(atol) - x1_desc = dpnp.get_dpnp_descriptor(x1) - x2_desc = dpnp.get_dpnp_descriptor(x2) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) + x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_nondefault_queue=False) if x1_desc and x2_desc and not kwargs: if not rtol_is_scalar or not atol_is_scalar: @@ -202,7 +202,7 @@ def any(x1, axis=None, out=None, keepdims=False): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc: if axis is not None: pass diff --git a/dpnp/dpnp_iface_manipulation.py b/dpnp/dpnp_iface_manipulation.py index 6a06db4cb5da..45938f0d52ad 100644 --- a/dpnp/dpnp_iface_manipulation.py +++ b/dpnp/dpnp_iface_manipulation.py @@ -85,7 +85,7 @@ def asfarray(x1, dtype=numpy.float64): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc: # behavior of original function: int types replaced with float64 if numpy.issubdtype(dtype, numpy.integer): @@ -131,7 +131,7 @@ def atleast_2d(*arys): all_is_array = True arys_desc = [] for ary in arys: - ary_desc = dpnp.get_dpnp_descriptor(ary) + ary_desc = dpnp.get_dpnp_descriptor(ary, copy_when_nondefault_queue=False) if ary_desc: arys_desc.append(ary_desc) else: @@ -166,7 +166,7 @@ def atleast_3d(*arys): all_is_array = True arys_desc = [] for ary in arys: - ary_desc = dpnp.get_dpnp_descriptor(ary) + ary_desc = dpnp.get_dpnp_descriptor(ary, copy_when_nondefault_queue=False) if ary_desc: arys_desc.append(ary_desc) else: @@ -232,8 +232,8 @@ def copyto(dst, src, casting='same_kind', where=True): """ - dst_desc = dpnp.get_dpnp_descriptor(dst, copy_when_strides=False) - src_desc = dpnp.get_dpnp_descriptor(src) + dst_desc = dpnp.get_dpnp_descriptor(dst, copy_when_strides=False, copy_when_nondefault_queue=False) + src_desc = dpnp.get_dpnp_descriptor(src, copy_when_nondefault_queue=False) if dst_desc and src_desc: if casting != 'same_kind': pass @@ -315,7 +315,7 @@ def expand_dims(x1, axis): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc: return dpnp_expand_dims(x1_desc, axis).get_pyobj() @@ -370,7 +370,7 @@ def moveaxis(x1, source, destination): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc: source_norm = normalize_axis(source, x1_desc.ndim) destination_norm = normalize_axis(destination, x1_desc.ndim) @@ -418,7 +418,7 @@ def ravel(x1, order='C'): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc: return dpnp_flatten(x1_desc).get_pyobj() @@ -451,7 +451,7 @@ def repeat(x1, repeats, axis=None): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc: if axis is not None and axis != 0: pass @@ -478,7 +478,7 @@ def reshape(x1, newshape, order='C'): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc: if order != 'C': pass @@ -521,7 +521,7 @@ def rollaxis(x1, axis, start=0): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc: if not isinstance(axis, int): pass @@ -570,7 +570,7 @@ def squeeze(x1, axis=None): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc: return dpnp_squeeze(x1_desc, axis).get_pyobj() @@ -614,7 +614,7 @@ def swapaxes(x1, axis1, axis2): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc: if axis1 >= x1_desc.ndim: pass @@ -665,7 +665,7 @@ def transpose(x1, axes=None): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc: if axes is not None: if not any(axes): diff --git a/dpnp/dpnp_iface_searching.py b/dpnp/dpnp_iface_searching.py index a5d3cfa671ae..cef5d686035b 100644 --- a/dpnp/dpnp_iface_searching.py +++ b/dpnp/dpnp_iface_searching.py @@ -95,7 +95,7 @@ def argmax(x1, axis=None, out=None): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc: if axis is not None: pass @@ -150,7 +150,7 @@ def argmin(x1, axis=None, out=None): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc: if axis is not None: pass diff --git a/dpnp/dpnp_iface_sorting.py b/dpnp/dpnp_iface_sorting.py index 2e01335e9b0c..cdce86cbacc4 100644 --- a/dpnp/dpnp_iface_sorting.py +++ b/dpnp/dpnp_iface_sorting.py @@ -89,7 +89,7 @@ def argsort(in_array1, axis=-1, kind=None, order=None): """ - x1_desc = dpnp.get_dpnp_descriptor(in_array1) + x1_desc = dpnp.get_dpnp_descriptor(in_array1, copy_when_nondefault_queue=False) if x1_desc: if axis != -1: pass @@ -115,7 +115,7 @@ def partition(x1, kth, axis=-1, kind='introselect', order=None): Parameters ``axis``, ``kind`` and ``order`` are supported only with default values. """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc: if not isinstance(kth, int): pass @@ -148,8 +148,8 @@ def searchsorted(x1, x2, side='left', sorter=None): Parameters ``sorter`` is supported only with default values. """ - x1_desc = dpnp.get_dpnp_descriptor(x1) - x2_desc = dpnp.get_dpnp_descriptor(x2) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) + x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_nondefault_queue=False) if 0 and x1_desc and x2_desc: if x1_desc.ndim != 1: pass @@ -198,7 +198,7 @@ def sort(x1, **kwargs): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc and not kwargs: if x1_desc.ndim != 1: pass diff --git a/dpnp/dpnp_iface_statistics.py b/dpnp/dpnp_iface_statistics.py index 960ee6bb51da..27eaf4a115f5 100644 --- a/dpnp/dpnp_iface_statistics.py +++ b/dpnp/dpnp_iface_statistics.py @@ -154,7 +154,7 @@ def average(x1, axis=None, weights=None, returned=False): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc: if axis is not None: pass @@ -221,8 +221,8 @@ def correlate(x1, x2, mode='valid'): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) - x2_desc = dpnp.get_dpnp_descriptor(x2) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) + x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_nondefault_queue=False) if x1_desc and x2_desc: if x1_desc.size != x2_desc.size or x1_desc.size == 0: pass @@ -274,7 +274,7 @@ def cov(x1, y=None, rowvar=True, bias=False, ddof=None, fweights=None, aweights= """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc: if x1_desc.ndim > 2: pass @@ -292,7 +292,7 @@ def cov(x1, y=None, rowvar=True, bias=False, ddof=None, fweights=None, aweights= pass else: if x1_desc.dtype != dpnp.float64: - x1_desc = dpnp.get_dpnp_descriptor(dpnp.astype(x1, dpnp.float64)) + x1_desc = dpnp.get_dpnp_descriptor(dpnp.astype(x1, dpnp.float64), copy_when_nondefault_queue=False) return dpnp_cov(x1_desc).get_pyobj() @@ -350,7 +350,7 @@ def max(x1, axis=None, out=None, keepdims=False, initial=None, where=True): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc: # Negative values in 'shape' are not allowed in input array # 306-322 check on negative and duplicate axis @@ -427,7 +427,7 @@ def mean(x1, axis=None, **kwargs): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc and not kwargs: if x1_desc.size == 0: pass @@ -473,7 +473,7 @@ def median(x1, axis=None, out=None, overwrite_input=False, keepdims=False): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc: if axis is not None: pass @@ -516,7 +516,7 @@ def min(x1, axis=None, out=None, keepdims=False, initial=None, where=True): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc: if out is not None: pass @@ -551,7 +551,7 @@ def nanvar(x1, axis=None, dtype=None, out=None, ddof=0, keepdims=False): Otherwise the function will be executed sequentially on CPU. """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc: if x1.size == 0: pass @@ -609,7 +609,7 @@ def std(x1, axis=None, dtype=None, out=None, ddof=0, keepdims=False): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc: if x1_desc.size == 0: pass @@ -667,7 +667,7 @@ def var(x1, axis=None, dtype=None, out=None, ddof=0, keepdims=False): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc: if x1_desc.size == 0: pass diff --git a/dpnp/dpnp_iface_trigonometric.py b/dpnp/dpnp_iface_trigonometric.py index 841bcde54325..c50ec260adae 100644 --- a/dpnp/dpnp_iface_trigonometric.py +++ b/dpnp/dpnp_iface_trigonometric.py @@ -108,7 +108,7 @@ def arccos(x1): """ - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False) if x1_desc: return dpnp_arccos(x1_desc).get_pyobj() @@ -145,7 +145,7 @@ def arccosh(x1): """ - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False) if x1_desc: return dpnp_arccosh(x1_desc).get_pyobj() @@ -184,9 +184,9 @@ def arcsin(x1, out=None, **kwargs): """ - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False) if x1_desc: - out_desc = dpnp.get_dpnp_descriptor(out) if out is not None else None + out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) if out is not None else None return dpnp_arcsin(x1_desc, out_desc).get_pyobj() return call_origin(numpy.arcsin, x1, out=out, **kwargs) @@ -214,7 +214,7 @@ def arcsinh(x1): """ - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False) if x1_desc: return dpnp_arcsinh(x1_desc).get_pyobj() @@ -249,9 +249,9 @@ def arctan(x1, out=None, **kwargs): """ - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False) if x1_desc: - out_desc = dpnp.get_dpnp_descriptor(out) if out is not None else None + out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) if out is not None else None return dpnp_arctan(x1_desc, out_desc).get_pyobj() return call_origin(numpy.arctan, x1, out=out, **kwargs) @@ -278,7 +278,7 @@ def arctanh(x1): """ - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False) if x1_desc: return dpnp_arctanh(x1_desc).get_pyobj() @@ -306,7 +306,7 @@ def cbrt(x1): """ - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False) if x1_desc: return dpnp_cbrt(x1_desc).get_pyobj() @@ -346,8 +346,8 @@ def arctan2(x1, x2, dtype=None, out=None, where=True, **kwargs): x1_is_scalar = dpnp.isscalar(x1) x2_is_scalar = dpnp.isscalar(x2) - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False) - x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False) + x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False) if x1_desc and x2_desc and not kwargs: if not x1_desc and not x1_is_scalar: @@ -365,7 +365,7 @@ def arctan2(x1, x2, dtype=None, out=None, where=True, **kwargs): elif not where: pass else: - out_desc = dpnp.get_dpnp_descriptor(out) if out is not None else None + out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) if out is not None else None return dpnp_arctan2(x1_desc, x2_desc, dtype, out_desc, where).get_pyobj() return call_origin(numpy.arctan2, x1, x2, dtype=dtype, out=out, where=where, **kwargs) @@ -393,9 +393,9 @@ def cos(x1, out=None, **kwargs): """ - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False) if x1_desc: - out_desc = dpnp.get_dpnp_descriptor(out) if out is not None else None + out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) if out is not None else None return dpnp_cos(x1_desc, out_desc).get_pyobj() return call_origin(numpy.cos, x1, out=out, **kwargs) @@ -422,7 +422,7 @@ def cosh(x1): """ - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False) if x1_desc: return dpnp_cosh(x1_desc).get_pyobj() @@ -473,7 +473,7 @@ def degrees(x1): """ - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False) if x1_desc: return dpnp_degrees(x1_desc).get_pyobj() @@ -506,9 +506,9 @@ def exp(x1, out=None, **kwargs): """ - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False) if x1_desc: - out_desc = dpnp.get_dpnp_descriptor(out) if out is not None else None + out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) if out is not None else None return dpnp_exp(x1_desc, out_desc).get_pyobj() return call_origin(numpy.exp, x1, out=out, **kwargs) @@ -540,7 +540,7 @@ def exp2(x1): """ - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False) if x1_desc: return dpnp_exp2(x1_desc).get_pyobj() @@ -570,7 +570,7 @@ def expm1(x1): """ - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False) if x1_desc: return dpnp_expm1(x1_desc).get_pyobj() @@ -604,8 +604,8 @@ def hypot(x1, x2, dtype=None, out=None, where=True, **kwargs): x1_is_scalar = dpnp.isscalar(x1) x2_is_scalar = dpnp.isscalar(x2) - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False) - x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False) + x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False) if x1_desc and x2_desc and not kwargs: if not x1_desc and not x1_is_scalar: @@ -625,7 +625,7 @@ def hypot(x1, x2, dtype=None, out=None, where=True, **kwargs): elif not where: pass else: - out_desc = dpnp.get_dpnp_descriptor(out) if out is not None else None + out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) if out is not None else None return dpnp_hypot(x1_desc, x2_desc, dtype, out_desc, where).get_pyobj() return call_origin(numpy.hypot, x1, x2, dtype=dtype, out=out, where=where, **kwargs) @@ -661,9 +661,9 @@ def log(x1, out=None, **kwargs): """ - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False) if x1_desc: - out_desc = dpnp.get_dpnp_descriptor(out) if out is not None else None + out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) if out is not None else None return dpnp_log(x1_desc, out_desc).get_pyobj() return call_origin(numpy.log, x1, out=out, **kwargs) @@ -690,7 +690,7 @@ def log10(x1): """ - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False) if x1_desc: return dpnp_log10(x1_desc).get_pyobj() @@ -722,7 +722,7 @@ def log1p(x1): """ - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False) if x1_desc: return dpnp_log1p(x1_desc).get_pyobj() @@ -758,7 +758,7 @@ def log2(x1): """ - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False) if x1_desc: return dpnp_log2(x1_desc).get_pyobj() @@ -788,7 +788,7 @@ def reciprocal(x1, **kwargs): """ - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False) if x1_desc and not kwargs: return dpnp_recip(x1_desc).get_pyobj() @@ -838,7 +838,7 @@ def radians(x1): """ - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False) if x1_desc: return dpnp_radians(x1_desc).get_pyobj() @@ -876,9 +876,9 @@ def sin(x1, out=None, **kwargs): """ - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False) if x1_desc: - out_desc = dpnp.get_dpnp_descriptor(out) if out is not None else None + out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) if out is not None else None return dpnp_sin(x1_desc, out_desc).get_pyobj() return call_origin(numpy.sin, x1, out=out, **kwargs) @@ -906,7 +906,7 @@ def sinh(x1): """ - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False) if x1_desc: return dpnp_sinh(x1_desc).get_pyobj() @@ -935,7 +935,7 @@ def sqrt(x1): """ - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False) if x1_desc: return dpnp_sqrt(x1_desc).get_pyobj() @@ -970,7 +970,7 @@ def square(x1): """ - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False) if x1_desc: return dpnp_square(x1_desc).get_pyobj() @@ -1000,9 +1000,9 @@ def tan(x1, out=None, **kwargs): """ - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False) if x1_desc: - out_desc = dpnp.get_dpnp_descriptor(out) if out is not None else None + out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) if out is not None else None return dpnp_tan(x1_desc, out_desc).get_pyobj() return call_origin(numpy.tan, x1, out=out, **kwargs) @@ -1030,7 +1030,7 @@ def tanh(x1): """ - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False) if x1_desc: return dpnp_tanh(x1_desc).get_pyobj() @@ -1066,7 +1066,7 @@ def unwrap(x1): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc: return dpnp_unwrap(x1_desc).get_pyobj() From 8431a6473ae9a1b488904054cca526e124f5e942 Mon Sep 17 00:00:00 2001 From: Denis Smirnov Date: Thu, 24 Feb 2022 04:39:28 -0600 Subject: [PATCH 05/18] Fix conjugate --- dpnp/backend/kernels/dpnp_krnl_elemwise.cpp | 2 +- tests/skipped_tests.tbl | 3 --- tests/skipped_tests_gpu.tbl | 3 --- 3 files changed, 1 insertion(+), 7 deletions(-) diff --git a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp index e9864d3da8a9..98aff7eac758 100644 --- a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp @@ -724,7 +724,7 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap) fmap[DPNPFuncName::DPNP_FN_CONJIGUATE_EXT][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_copy_c_ext}; fmap[DPNPFuncName::DPNP_FN_CONJIGUATE_EXT][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_copy_c_ext}; fmap[DPNPFuncName::DPNP_FN_CONJIGUATE_EXT][eft_C128][eft_C128] = { - eft_C128, (void*)dpnp_copy_c_ext>}; + eft_C128, (void*)dpnp_conjugate_c_ext>}; fmap[DPNPFuncName::DPNP_FN_COPY][eft_BLN][eft_BLN] = {eft_BLN, (void*)dpnp_copy_c_default}; fmap[DPNPFuncName::DPNP_FN_COPY][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_copy_c_default}; diff --git a/tests/skipped_tests.tbl b/tests/skipped_tests.tbl index 5692e09e014b..f9863951f5b2 100644 --- a/tests/skipped_tests.tbl +++ b/tests/skipped_tests.tbl @@ -1,8 +1,5 @@ -tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-conjugate-data2] tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-trapz-data19] -tests/test_sycl_queue.py::test_1in_1out[opencl:cpu:0-conjugate-data2] tests/test_sycl_queue.py::test_1in_1out[opencl:cpu:0-trapz-data19] -tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-conjugate-data2] tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-floor_divide-data12-data22] tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-remainder-data15-data25] tests/test_sycl_queue.py::test_broadcasting[opencl:cpu:0-floor_divide-data12-data22] diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl index 833fc84c6b25..15c48a564006 100644 --- a/tests/skipped_tests_gpu.tbl +++ b/tests/skipped_tests_gpu.tbl @@ -1,8 +1,5 @@ -tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-conjugate-data2] tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-trapz-data19] -tests/test_sycl_queue.py::test_1in_1out[opencl:cpu:0-conjugate-data2] tests/test_sycl_queue.py::test_1in_1out[opencl:cpu:0-trapz-data19] -tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-conjugate-data2] tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-floor_divide-data12-data22] tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-remainder-data15-data25] tests/test_sycl_queue.py::test_broadcasting[opencl:cpu:0-floor_divide-data12-data22] From 6783c04ced53776206464f1c861dc9d853228625 Mon Sep 17 00:00:00 2001 From: Denis Smirnov Date: Wed, 2 Mar 2022 18:19:52 +0300 Subject: [PATCH 06/18] Skip tests which test function abs --- tests/skipped_tests_gpu.tbl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl index 15c48a564006..859b8dc80e72 100644 --- a/tests/skipped_tests_gpu.tbl +++ b/tests/skipped_tests_gpu.tbl @@ -1,3 +1,7 @@ +tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-abs-data0] +tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-abs-data0] +tests/test_sycl_queue.py::test_1in_1out[opencl:cpu:0-abs-data0] + tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-trapz-data19] tests/test_sycl_queue.py::test_1in_1out[opencl:cpu:0-trapz-data19] tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-floor_divide-data12-data22] From 9a496d8796894e015ab0b3e5e32ecb9fafe916eb Mon Sep 17 00:00:00 2001 From: Denis Smirnov Date: Wed, 2 Mar 2022 22:09:51 +0300 Subject: [PATCH 07/18] Skip tests which test function ceil --- tests/skipped_tests_gpu.tbl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl index 859b8dc80e72..4959eb24c5ac 100644 --- a/tests/skipped_tests_gpu.tbl +++ b/tests/skipped_tests_gpu.tbl @@ -1,6 +1,9 @@ tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-abs-data0] tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-abs-data0] tests/test_sycl_queue.py::test_1in_1out[opencl:cpu:0-abs-data0] +tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-ceil-data1] +tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-ceil-data1] +tests/test_sycl_queue.py::test_1in_1out[opencl:cpu:0-ceil-data1] tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-trapz-data19] tests/test_sycl_queue.py::test_1in_1out[opencl:cpu:0-trapz-data19] From 0ac3872e94c827ff86918630004d6070d03b893d Mon Sep 17 00:00:00 2001 From: Denis Smirnov Date: Wed, 2 Mar 2022 22:59:18 +0300 Subject: [PATCH 08/18] Skip tests which test function conjugate --- tests/skipped_tests_gpu.tbl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl index 4959eb24c5ac..eaa571f8d02f 100644 --- a/tests/skipped_tests_gpu.tbl +++ b/tests/skipped_tests_gpu.tbl @@ -4,6 +4,9 @@ tests/test_sycl_queue.py::test_1in_1out[opencl:cpu:0-abs-data0] tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-ceil-data1] tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-ceil-data1] tests/test_sycl_queue.py::test_1in_1out[opencl:cpu:0-ceil-data1] +tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-conjugate-data2] +tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-conjugate-data2] +tests/test_sycl_queue.py::test_1in_1out[opencl:cpu:0-conjugate-data2] tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-trapz-data19] tests/test_sycl_queue.py::test_1in_1out[opencl:cpu:0-trapz-data19] From 181490e968d00e07adc1817d9c41eef30b856d42 Mon Sep 17 00:00:00 2001 From: Denis Smirnov Date: Thu, 3 Mar 2022 11:18:32 +0300 Subject: [PATCH 09/18] Skip level_zero tests --- tests/skipped_tests_gpu.tbl | 56 +++++++++++++++++++++++++++++++++---- 1 file changed, 50 insertions(+), 6 deletions(-) diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl index eaa571f8d02f..0d207e7a01b3 100644 --- a/tests/skipped_tests_gpu.tbl +++ b/tests/skipped_tests_gpu.tbl @@ -1,12 +1,56 @@ tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-abs-data0] -tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-abs-data0] -tests/test_sycl_queue.py::test_1in_1out[opencl:cpu:0-abs-data0] tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-ceil-data1] -tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-ceil-data1] -tests/test_sycl_queue.py::test_1in_1out[opencl:cpu:0-ceil-data1] tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-conjugate-data2] -tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-conjugate-data2] -tests/test_sycl_queue.py::test_1in_1out[opencl:cpu:0-conjugate-data2] +tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-copy-data3] +tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-cumprod-data4] +tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-cumsum-data5] +tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-diff-data6] +tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-ediff1d-data7] +tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-fabs-data8] +tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-floor-data9] +tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-gradient-data10] +tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-nancumprod-data11] +tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-nancumsum-data12] +tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-nanprod-data13] +tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-nansum-data14] +tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-negative-data15] +tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-prod-data16] +tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-sign-data17] +tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-sum-data18] +tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-trapz-data19] +tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-trunc-data20] +tests/test_sycl_queue.py::test_2in_1out[level_zero:gpu:0-add-data10-data20] +tests/test_sycl_queue.py::test_2in_1out[level_zero:gpu:0-copysign-data11-data21] +tests/test_sycl_queue.py::test_2in_1out[level_zero:gpu:0-cross-data12-data22] +tests/test_sycl_queue.py::test_2in_1out[level_zero:gpu:0-divide-data13-data23] +tests/test_sycl_queue.py::test_2in_1out[level_zero:gpu:0-floor_divide-data14-data24] +tests/test_sycl_queue.py::test_2in_1out[level_zero:gpu:0-fmod-data15-data25] +tests/test_sycl_queue.py::test_2in_1out[level_zero:gpu:0-maximum-data16-data26] +tests/test_sycl_queue.py::test_2in_1out[level_zero:gpu:0-minimum-data17-data27] +tests/test_sycl_queue.py::test_2in_1out[level_zero:gpu:0-multiply-data18-data28] +tests/test_sycl_queue.py::test_2in_1out[level_zero:gpu:0-power-data19-data29] +tests/test_sycl_queue.py::test_2in_1out[level_zero:gpu:0-remainder-data110-data210] +tests/test_sycl_queue.py::test_2in_1out[level_zero:gpu:0-subtract-data111-data211] +tests/test_sycl_queue.py::test_2in_1out[level_zero:gpu:0-matmul-data112-data212] +tests/test_sycl_queue.py::test_broadcasting[level_zero:gpu:0-add-data10-data20] +tests/test_sycl_queue.py::test_broadcasting[level_zero:gpu:0-divide-data11-data21] +tests/test_sycl_queue.py::test_broadcasting[level_zero:gpu:0-floor_divide-data12-data22] +tests/test_sycl_queue.py::test_broadcasting[level_zero:gpu:0-fmod-data13-data23] +tests/test_sycl_queue.py::test_broadcasting[level_zero:gpu:0-multiply-data14-data24] +tests/test_sycl_queue.py::test_broadcasting[level_zero:gpu:0-remainder-data15-data25] +tests/test_sycl_queue.py::test_broadcasting[level_zero:gpu:0-subtract-data16-data26] +tests/test_sycl_queue.py::test_out[level_zero:gpu:0-add-data10-data20] +tests/test_sycl_queue.py::test_out[level_zero:gpu:0-copysign-data11-data21] +tests/test_sycl_queue.py::test_out[level_zero:gpu:0-divide-data12-data22] +tests/test_sycl_queue.py::test_out[level_zero:gpu:0-floor_divide-data13-data23] +tests/test_sycl_queue.py::test_out[level_zero:gpu:0-fmod-data14-data24] +tests/test_sycl_queue.py::test_out[level_zero:gpu:0-maximum-data15-data25] +tests/test_sycl_queue.py::test_out[level_zero:gpu:0-minimum-data16-data26] +tests/test_sycl_queue.py::test_out[level_zero:gpu:0-multiply-data17-data27] +tests/test_sycl_queue.py::test_out[level_zero:gpu:0-power-data18-data28] +tests/test_sycl_queue.py::test_out[level_zero:gpu:0-remainder-data19-data29] +tests/test_sycl_queue.py::test_out[level_zero:gpu:0-subtract-data110-data210] +tests/test_sycl_queue.py::test_modf[level_zero:gpu:0] tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-trapz-data19] tests/test_sycl_queue.py::test_1in_1out[opencl:cpu:0-trapz-data19] From dd92cdb816d61f8da0d0b2278d3f5af6ec2859ad Mon Sep 17 00:00:00 2001 From: Denis Smirnov Date: Fri, 4 Mar 2022 10:34:10 +0300 Subject: [PATCH 10/18] Skip all tests from test_sycl_queue.py --- tests/skipped_tests_gpu.tbl | 54 +++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl index 0d207e7a01b3..a9c4cdd335e0 100644 --- a/tests/skipped_tests_gpu.tbl +++ b/tests/skipped_tests_gpu.tbl @@ -1,3 +1,57 @@ +tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-abs-data0] +tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-ceil-data1] +tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-conjugate-data2] +tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-copy-data3] +tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-cumprod-data4] +tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-cumsum-data5] +tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-diff-data6] +tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-ediff1d-data7] +tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-fabs-data8] +tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-floor-data9] +tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-gradient-data10] +tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-nancumprod-data11] +tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-nancumsum-data12] +tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-nanprod-data13] +tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-nansum-data14] +tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-negative-data15] +tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-prod-data16] +tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-sign-data17] +tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-sum-data18] +tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-trapz-data19] +tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-trunc-data20] +tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-add-data10-data20] +tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-copysign-data11-data21] +tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-cross-data12-data22] +tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-divide-data13-data23] +tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-floor_divide-data14-data24] +tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-fmod-data15-data25] +tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-maximum-data16-data26] +tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-minimum-data17-data27] +tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-multiply-data18-data28] +tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-power-data19-data29] +tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-remainder-data110-data210] +tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-subtract-data111-data211] +tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-matmul-data112-data212] +tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-add-data10-data20] +tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-divide-data11-data21] +tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-floor_divide-data12-data22] +tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-fmod-data13-data23] +tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-multiply-data14-data24] +tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-remainder-data15-data25] +tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-subtract-data16-data26] +tests/test_sycl_queue.py::test_out[opencl:gpu:0-add-data10-data20] +tests/test_sycl_queue.py::test_out[opencl:gpu:0-copysign-data11-data21] +tests/test_sycl_queue.py::test_out[opencl:gpu:0-divide-data12-data22] +tests/test_sycl_queue.py::test_out[opencl:gpu:0-floor_divide-data13-data23] +tests/test_sycl_queue.py::test_out[opencl:gpu:0-fmod-data14-data24] +tests/test_sycl_queue.py::test_out[opencl:gpu:0-maximum-data15-data25] +tests/test_sycl_queue.py::test_out[opencl:gpu:0-minimum-data16-data26] +tests/test_sycl_queue.py::test_out[opencl:gpu:0-multiply-data17-data27] +tests/test_sycl_queue.py::test_out[opencl:gpu:0-power-data18-data28] +tests/test_sycl_queue.py::test_out[opencl:gpu:0-remainder-data19-data29] +tests/test_sycl_queue.py::test_out[opencl:gpu:0-subtract-data110-data210] +tests/test_sycl_queue.py::test_modf[opencl:gpu:0] + tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-abs-data0] tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-ceil-data1] tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-conjugate-data2] From eb8dd52b0d2a3df409680975f3d75b913fae966a Mon Sep 17 00:00:00 2001 From: Denis Smirnov Date: Fri, 4 Mar 2022 13:50:34 +0300 Subject: [PATCH 11/18] Unskip test for modf --- tests/skipped_tests_gpu.tbl | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl index a9c4cdd335e0..fbc0bf813080 100644 --- a/tests/skipped_tests_gpu.tbl +++ b/tests/skipped_tests_gpu.tbl @@ -50,7 +50,6 @@ tests/test_sycl_queue.py::test_out[opencl:gpu:0-multiply-data17-data27] tests/test_sycl_queue.py::test_out[opencl:gpu:0-power-data18-data28] tests/test_sycl_queue.py::test_out[opencl:gpu:0-remainder-data19-data29] tests/test_sycl_queue.py::test_out[opencl:gpu:0-subtract-data110-data210] -tests/test_sycl_queue.py::test_modf[opencl:gpu:0] tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-abs-data0] tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-ceil-data1] From 3f99c620a2507de859ab61336dbf1e95a132da55 Mon Sep 17 00:00:00 2001 From: Denis Smirnov Date: Fri, 4 Mar 2022 15:13:34 +0300 Subject: [PATCH 12/18] Unskip tests/test_sycl_queue.py::test_out --- tests/skipped_tests_gpu.tbl | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl index fbc0bf813080..a98cc18a9c90 100644 --- a/tests/skipped_tests_gpu.tbl +++ b/tests/skipped_tests_gpu.tbl @@ -39,17 +39,6 @@ tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-fmod-data13-data23] tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-multiply-data14-data24] tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-remainder-data15-data25] tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-subtract-data16-data26] -tests/test_sycl_queue.py::test_out[opencl:gpu:0-add-data10-data20] -tests/test_sycl_queue.py::test_out[opencl:gpu:0-copysign-data11-data21] -tests/test_sycl_queue.py::test_out[opencl:gpu:0-divide-data12-data22] -tests/test_sycl_queue.py::test_out[opencl:gpu:0-floor_divide-data13-data23] -tests/test_sycl_queue.py::test_out[opencl:gpu:0-fmod-data14-data24] -tests/test_sycl_queue.py::test_out[opencl:gpu:0-maximum-data15-data25] -tests/test_sycl_queue.py::test_out[opencl:gpu:0-minimum-data16-data26] -tests/test_sycl_queue.py::test_out[opencl:gpu:0-multiply-data17-data27] -tests/test_sycl_queue.py::test_out[opencl:gpu:0-power-data18-data28] -tests/test_sycl_queue.py::test_out[opencl:gpu:0-remainder-data19-data29] -tests/test_sycl_queue.py::test_out[opencl:gpu:0-subtract-data110-data210] tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-abs-data0] tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-ceil-data1] From f4f466a0855d552d2ee29e00326298db1083bcb7 Mon Sep 17 00:00:00 2001 From: Denis Smirnov Date: Fri, 4 Mar 2022 16:01:18 +0300 Subject: [PATCH 13/18] Unskip tests/test_sycl_queue.py::test_broadcasting --- tests/skipped_tests_gpu.tbl | 7 ------- 1 file changed, 7 deletions(-) diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl index a98cc18a9c90..618b72a61498 100644 --- a/tests/skipped_tests_gpu.tbl +++ b/tests/skipped_tests_gpu.tbl @@ -32,13 +32,6 @@ tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-power-data19-data29] tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-remainder-data110-data210] tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-subtract-data111-data211] tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-matmul-data112-data212] -tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-add-data10-data20] -tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-divide-data11-data21] -tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-floor_divide-data12-data22] -tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-fmod-data13-data23] -tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-multiply-data14-data24] -tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-remainder-data15-data25] -tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-subtract-data16-data26] tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-abs-data0] tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-ceil-data1] From 47edea45f4578bc3e2437f525b73e7c01835a6b5 Mon Sep 17 00:00:00 2001 From: Denis Smirnov Date: Fri, 4 Mar 2022 17:37:36 +0300 Subject: [PATCH 14/18] Unskip test_1in_1out and test_2in_1out --- tests/skipped_tests_gpu.tbl | 41 +++++++------------------------------ 1 file changed, 7 insertions(+), 34 deletions(-) diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl index 618b72a61498..84267f8a30d7 100644 --- a/tests/skipped_tests_gpu.tbl +++ b/tests/skipped_tests_gpu.tbl @@ -1,37 +1,10 @@ -tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-abs-data0] -tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-ceil-data1] -tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-conjugate-data2] -tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-copy-data3] -tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-cumprod-data4] -tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-cumsum-data5] -tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-diff-data6] -tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-ediff1d-data7] -tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-fabs-data8] -tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-floor-data9] -tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-gradient-data10] -tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-nancumprod-data11] -tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-nancumsum-data12] -tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-nanprod-data13] -tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-nansum-data14] -tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-negative-data15] -tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-prod-data16] -tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-sign-data17] -tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-sum-data18] -tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-trapz-data19] -tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-trunc-data20] -tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-add-data10-data20] -tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-copysign-data11-data21] -tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-cross-data12-data22] -tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-divide-data13-data23] -tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-floor_divide-data14-data24] -tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-fmod-data15-data25] -tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-maximum-data16-data26] -tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-minimum-data17-data27] -tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-multiply-data18-data28] -tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-power-data19-data29] -tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-remainder-data110-data210] -tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-subtract-data111-data211] -tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-matmul-data112-data212] +tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-add-data10-data20] +tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-divide-data11-data21] +tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-floor_divide-data12-data22] +tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-fmod-data13-data23] +tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-multiply-data14-data24] +tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-remainder-data15-data25] +tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-subtract-data16-data26] tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-abs-data0] tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-ceil-data1] From 8e632084d5ede75196cc19024f0bd3ef0f5d661e Mon Sep 17 00:00:00 2001 From: Denis Smirnov Date: Fri, 4 Mar 2022 20:20:50 +0300 Subject: [PATCH 15/18] Unskip test_2in_1out --- tests/skipped_tests_gpu.tbl | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl index 84267f8a30d7..ae77cd92f512 100644 --- a/tests/skipped_tests_gpu.tbl +++ b/tests/skipped_tests_gpu.tbl @@ -1,3 +1,24 @@ +tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-abs-data0] +tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-ceil-data1] +tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-conjugate-data2] +tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-copy-data3] +tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-cumprod-data4] +tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-cumsum-data5] +tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-diff-data6] +tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-ediff1d-data7] +tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-fabs-data8] +tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-floor-data9] +tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-gradient-data10] +tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-nancumprod-data11] +tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-nancumsum-data12] +tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-nanprod-data13] +tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-nansum-data14] +tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-negative-data15] +tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-prod-data16] +tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-sign-data17] +tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-sum-data18] +tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-trapz-data19] +tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-trunc-data20] tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-add-data10-data20] tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-divide-data11-data21] tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-floor_divide-data12-data22] From 39f8b8728ce0e0670f258e26ce385ce035772539 Mon Sep 17 00:00:00 2001 From: Denis Smirnov Date: Fri, 4 Mar 2022 21:48:53 +0300 Subject: [PATCH 16/18] Unskip test_1in_1out --- tests/skipped_tests_gpu.tbl | 34 +++++++++++++--------------------- 1 file changed, 13 insertions(+), 21 deletions(-) diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl index ae77cd92f512..0f461ed6c833 100644 --- a/tests/skipped_tests_gpu.tbl +++ b/tests/skipped_tests_gpu.tbl @@ -1,24 +1,16 @@ -tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-abs-data0] -tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-ceil-data1] -tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-conjugate-data2] -tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-copy-data3] -tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-cumprod-data4] -tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-cumsum-data5] -tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-diff-data6] -tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-ediff1d-data7] -tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-fabs-data8] -tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-floor-data9] -tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-gradient-data10] -tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-nancumprod-data11] -tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-nancumsum-data12] -tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-nanprod-data13] -tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-nansum-data14] -tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-negative-data15] -tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-prod-data16] -tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-sign-data17] -tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-sum-data18] -tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-trapz-data19] -tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-trunc-data20] +tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-add-data10-data20] +tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-copysign-data11-data21] +tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-cross-data12-data22] +tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-divide-data13-data23] +tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-floor_divide-data14-data24] +tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-fmod-data15-data25] +tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-maximum-data16-data26] +tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-minimum-data17-data27] +tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-multiply-data18-data28] +tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-power-data19-data29] +tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-remainder-data110-data210] +tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-subtract-data111-data211] +tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-matmul-data112-data212] tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-add-data10-data20] tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-divide-data11-data21] tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-floor_divide-data12-data22] From 78d6e28588ef9dbf81d19377875de1d006e22a64 Mon Sep 17 00:00:00 2001 From: Denis Smirnov Date: Sat, 5 Mar 2022 10:22:07 +0300 Subject: [PATCH 17/18] Skip test_1in_1out --- tests/skipped_tests_gpu.tbl | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl index 0f461ed6c833..a98cc18a9c90 100644 --- a/tests/skipped_tests_gpu.tbl +++ b/tests/skipped_tests_gpu.tbl @@ -1,3 +1,24 @@ +tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-abs-data0] +tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-ceil-data1] +tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-conjugate-data2] +tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-copy-data3] +tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-cumprod-data4] +tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-cumsum-data5] +tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-diff-data6] +tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-ediff1d-data7] +tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-fabs-data8] +tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-floor-data9] +tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-gradient-data10] +tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-nancumprod-data11] +tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-nancumsum-data12] +tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-nanprod-data13] +tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-nansum-data14] +tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-negative-data15] +tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-prod-data16] +tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-sign-data17] +tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-sum-data18] +tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-trapz-data19] +tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-trunc-data20] tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-add-data10-data20] tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-copysign-data11-data21] tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-cross-data12-data22] From 174c0f1fb7d39e15ca9a4e8b917404462ec75283 Mon Sep 17 00:00:00 2001 From: Denis Smirnov Date: Sat, 5 Mar 2022 12:13:23 +0300 Subject: [PATCH 18/18] Unskip half of test_1in_1out --- tests/skipped_tests_gpu.tbl | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl index a98cc18a9c90..cf173445b6d2 100644 --- a/tests/skipped_tests_gpu.tbl +++ b/tests/skipped_tests_gpu.tbl @@ -8,17 +8,7 @@ tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-diff-data6] tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-ediff1d-data7] tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-fabs-data8] tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-floor-data9] -tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-gradient-data10] -tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-nancumprod-data11] -tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-nancumsum-data12] -tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-nanprod-data13] -tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-nansum-data14] -tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-negative-data15] -tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-prod-data16] -tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-sign-data17] -tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-sum-data18] -tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-trapz-data19] -tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-trunc-data20] + tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-add-data10-data20] tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-copysign-data11-data21] tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-cross-data12-data22]