From 7bd11b2b80644c0e627d3573e823919028afa213 Mon Sep 17 00:00:00 2001
From: Denis Smirnov <denis.smirnov@intel.com>
Date: Fri, 4 Feb 2022 03:09:25 -0600
Subject: [PATCH 01/18] Extand main cython funcs with queues and events

---
 dpnp/backend/include/dpnp_iface_fptr.hpp      |   1 -
 .../kernels/dpnp_krnl_arraycreation.cpp       |   4 +-
 dpnp/backend/kernels/dpnp_krnl_bitwise.cpp    |  13 +-
 dpnp/backend/kernels/dpnp_krnl_common.cpp     |  16 +-
 dpnp/backend/kernels/dpnp_krnl_elemwise.cpp   |  19 +-
 .../kernels/dpnp_krnl_mathematical.cpp        |   3 +-
 dpnp/dpnp_algo/dpnp_algo.pxd                  | 219 ++++++++++++++--
 dpnp/dpnp_algo/dpnp_algo.pyx                  | 229 +++++++++++-----
 dpnp/dpnp_algo/dpnp_algo_arraycreation.pyx    | 245 +++++++++++++++---
 dpnp/dpnp_algo/dpnp_algo_bitwise.pyx          |  12 +-
 dpnp/dpnp_algo/dpnp_algo_counting.pyx         |   2 +-
 dpnp/dpnp_algo/dpnp_algo_manipulation.pyx     |  34 ++-
 dpnp/dpnp_algo/dpnp_algo_mathematical.pyx     |  90 ++++---
 dpnp/dpnp_algo/dpnp_algo_sorting.pyx          |   4 +-
 dpnp/dpnp_algo/dpnp_algo_special.pyx          |   2 +-
 dpnp/dpnp_algo/dpnp_algo_statistics.pyx       |  31 ++-
 dpnp/dpnp_algo/dpnp_algo_trigonometric.pyx    |  50 ++--
 17 files changed, 736 insertions(+), 238 deletions(-)
diff --git a/dpnp/backend/include/dpnp_iface_fptr.hpp b/dpnp/backend/include/dpnp_iface_fptr.hpp
index e79e5d2a1c24..8e33fa4db9bb 100644
--- a/dpnp/backend/include/dpnp_iface_fptr.hpp
+++ b/dpnp/backend/include/dpnp_iface_fptr.hpp
@@ -379,7 +379,6 @@ enum class DPNPFuncName : size_t
     DPNP_FN_ZEROS_LIKE,                   /**< Used in numpy.zeros_like() impl */
     DPNP_FN_ZEROS_LIKE_EXT,               /**< Used in numpy.zeros_like() impl, requires extra parameters */
     DPNP_FN_LAST,                         /**< The latest element of the enumeration */
-    DPNP_FN_LAST_EXT                      /**< The latest element of the enumeration, requires extra parameters */
 };
 
 /**
diff --git a/dpnp/backend/kernels/dpnp_krnl_arraycreation.cpp b/dpnp/backend/kernels/dpnp_krnl_arraycreation.cpp
index d359f7a34de3..9f1b742c84e1 100644
--- a/dpnp/backend/kernels/dpnp_krnl_arraycreation.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_arraycreation.cpp
@@ -73,9 +73,9 @@ DPCTLSyclEventRef dpnp_arange_c(DPCTLSyclQueueRef q_ref,
 
     event = q.submit(kernel_func);
 
-    event.wait();
+    event_ref = reinterpret_cast<DPCTLSyclEventRef>(&event);
 
-    return event_ref;
+    return DPCTLEvent_Copy(event_ref);
 }
 
 template <typename _DataType>
diff --git a/dpnp/backend/kernels/dpnp_krnl_bitwise.cpp b/dpnp/backend/kernels/dpnp_krnl_bitwise.cpp
index d96bc79aa867..4d0f6498ed0f 100644
--- a/dpnp/backend/kernels/dpnp_krnl_bitwise.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_bitwise.cpp
@@ -68,9 +68,9 @@ DPCTLSyclEventRef dpnp_invert_c(DPCTLSyclQueueRef q_ref,
 
     event = q.submit(kernel_func);
 
-    event.wait();
+    event_ref = reinterpret_cast<DPCTLSyclEventRef>(&event);
 
-    return event_ref;
+    return DPCTLEvent_Copy(event_ref);
 }
 
 template <typename _DataType>
@@ -101,6 +101,9 @@ static void func_map_init_bitwise_1arg_1type(func_map_t& fmap)
     fmap[DPNPFuncName::DPNP_FN_INVERT][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_invert_default_c<int32_t>};
     fmap[DPNPFuncName::DPNP_FN_INVERT][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_invert_default_c<int64_t>};
 
+    fmap[DPNPFuncName::DPNP_FN_INVERT_EXT][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_invert_ext_c<int32_t>};
+    fmap[DPNPFuncName::DPNP_FN_INVERT_EXT][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_invert_ext_c<int64_t>};
+
     return;
 }
 
@@ -209,7 +212,6 @@ static void func_map_init_bitwise_1arg_1type(func_map_t& fmap)
                 cgh.parallel_for<class __name__##_strides_kernel<_DataType>>(gws, kernel_parallel_for_func);           \
             };                                                                                                         \
             event = q.submit(kernel_func);                                                                             \
-            event.wait();                                                                                              \
         }                                                                                                              \
         else                                                                                                           \
         {                                                                                                              \
@@ -223,9 +225,10 @@ static void func_map_init_bitwise_1arg_1type(func_map_t& fmap)
                 cgh.parallel_for<class __name__##_kernel<_DataType>>(gws, kernel_parallel_for_func);                   \
             };                                                                                                         \
             event = q.submit(kernel_func);                                                                             \
-            event.wait();                                                                                              \
         }                                                                                                              \
-        return event_ref;                                                                             \
+        event_ref = reinterpret_cast<DPCTLSyclEventRef>(&event);                                                       \
+                                                                                                                       \
+        return DPCTLEvent_Copy(event_ref);                                                                             \
     }                                                                                                                  \
                                                                                                                        \
     template <typename _DataType>                                                                                      \
diff --git a/dpnp/backend/kernels/dpnp_krnl_common.cpp b/dpnp/backend/kernels/dpnp_krnl_common.cpp
index 95bbe90b8826..4c671f45604d 100644
--- a/dpnp/backend/kernels/dpnp_krnl_common.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_common.cpp
@@ -81,9 +81,9 @@ DPCTLSyclEventRef dpnp_astype_c(DPCTLSyclQueueRef q_ref,
 
     event = q.submit(kernel_func);
 
-    event.wait();
+    event_ref = reinterpret_cast<DPCTLSyclEventRef>(&event);
 
-    return event_ref;
+    return DPCTLEvent_Copy(event_ref);
 }
 
 template <typename _DataType, typename _ResultType>
@@ -732,9 +732,9 @@ DPCTLSyclEventRef dpnp_initval_c(DPCTLSyclQueueRef q_ref,
 
     sycl::event event = q.submit(kernel_func);
 
-    event.wait();
+    event_ref = reinterpret_cast<DPCTLSyclEventRef>(&event);
 
-    return event_ref;
+    return DPCTLEvent_Copy(event_ref);
 }
 
 template <typename _DataType>
@@ -756,10 +756,10 @@ void (*dpnp_initval_default_c)(void*, void*, size_t) = dpnp_initval_c<_DataType>
 
 template <typename _DataType>
 DPCTLSyclEventRef (*dpnp_initval_ext_c)(DPCTLSyclQueueRef,
-                                    void*,
-                                    void*,
-                                    size_t,
-                                    const DPCTLEventVectorRef) = dpnp_initval_c<_DataType>;
+                                        void*,
+                                        void*,
+                                        size_t,
+                                        const DPCTLEventVectorRef) = dpnp_initval_c<_DataType>;
 
 template <typename _KernelNameSpecialization>
 class dpnp_matmul_c_kernel;
diff --git a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp
index a3bba3124051..e9864d3da8a9 100644
--- a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp
@@ -116,7 +116,6 @@
                     gws, kernel_parallel_for_func);                                                                    \
             };                                                                                                         \
             event = q.submit(kernel_func);                                                                             \
-            event.wait();                                                                                              \
         }                                                                                                              \
         else                                                                                                           \
         {                                                                                                              \
@@ -142,9 +141,11 @@
             {                                                                                                          \
                 event = q.submit(kernel_func);                                                                         \
             }                                                                                                          \
-            event.wait();                                                                                              \
         }                                                                                                              \
-        return event_ref;                                                                                              \
+                                                                                                                       \
+        event_ref = reinterpret_cast<DPCTLSyclEventRef>(&event);                                                       \
+                                                                                                                       \
+        return DPCTLEvent_Copy(event_ref);                                                                             \
     }                                                                                                                  \
                                                                                                                        \
     template <typename _DataType_input, typename _DataType_output>                                                     \
@@ -643,9 +644,9 @@ static void func_map_init_elemwise_1arg_2type(func_map_t& fmap)
             }                                                                                                          \
         }                                                                                                              \
                                                                                                                        \
-        event.wait();                                                                                                  \
+        event_ref = reinterpret_cast<DPCTLSyclEventRef>(&event);                                                       \
                                                                                                                        \
-        return event_ref;                                                                                              \
+        return DPCTLEvent_Copy(event_ref);                                                                             \
     }                                                                                                                  \
                                                                                                                        \
     template <typename _DataType>                                                                                      \
@@ -941,6 +942,8 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap)
                                                                                                                        \
             input1_it->~DPNPC_id();                                                                                    \
             input2_it->~DPNPC_id();                                                                                    \
+                                                                                                                       \
+            return event_ref;                                                                                          \
         }                                                                                                              \
         else if (use_strides)                                                                                          \
         {                                                                                                              \
@@ -969,7 +972,6 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap)
             };                                                                                                         \
                                                                                                                        \
             event = q.submit(kernel_func);                                                                             \
-            event.wait();                                                                                              \
         }                                                                                                              \
         else                                                                                                           \
         {                                                                                                              \
@@ -995,9 +997,10 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap)
                 };                                                                                                     \
                 event = q.submit(kernel_func);                                                                         \
             }                                                                                                          \
-            event.wait();                                                                                              \
         }                                                                                                              \
-        return event_ref;                                                                                              \
+        event_ref = reinterpret_cast<DPCTLSyclEventRef>(&event);                                                       \
+                                                                                                                       \
+        return DPCTLEvent_Copy(event_ref);                                                                             \
     }                                                                                                                  \
                                                                                                                        \
     template <typename _DataType_output, typename _DataType_input1, typename _DataType_input2>                         \
diff --git a/dpnp/backend/kernels/dpnp_krnl_mathematical.cpp b/dpnp/backend/kernels/dpnp_krnl_mathematical.cpp
index 06abddf88532..bf69fce8e416 100644
--- a/dpnp/backend/kernels/dpnp_krnl_mathematical.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_mathematical.cpp
@@ -482,7 +482,8 @@ DPCTLSyclEventRef dpnp_ediff1d_c(DPCTLSyclQueueRef q_ref,
             gws, kernel_parallel_for_func);
     };
     event = q.submit(kernel_func);
-    event.wait();
+
+    event_ref = reinterpret_cast<DPCTLSyclEventRef>(&event);
 
     return DPCTLEvent_Copy(event_ref);
 }
diff --git a/dpnp/dpnp_algo/dpnp_algo.pxd b/dpnp/dpnp_algo/dpnp_algo.pxd
index 1cc650bdaaa8..a4f4156a8ed6 100644
--- a/dpnp/dpnp_algo/dpnp_algo.pxd
+++ b/dpnp/dpnp_algo/dpnp_algo.pxd
@@ -25,6 +25,8 @@
 # THE POSSIBILITY OF SUCH DAMAGE.
 # *****************************************************************************
 
+cimport dpctl as c_dpctl
+
 from libcpp cimport bool as cpp_bool
 
 from dpnp.dpnp_utils.dpnp_algo_utils cimport dpnp_descriptor
@@ -34,166 +36,325 @@ from dpnp.dpnp_algo cimport shape_elem_type, shape_type_c
 cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName":  # need this namespace for Enum import
     cdef enum DPNPFuncName "DPNPFuncName":
         DPNP_FN_ABSOLUTE
+        DPNP_FN_ABSOLUTE_EXT
         DPNP_FN_ADD
+        DPNP_FN_ADD_EXT
         DPNP_FN_ALL
+        DPNP_FN_ALL_EXT
         DPNP_FN_ALLCLOSE
+        DPNP_FN_ALLCLOSE_EXT
         DPNP_FN_ANY
+        DPNP_FN_ANY_EXT
         DPNP_FN_ARANGE
+        DPNP_FN_ARANGE_EXT
         DPNP_FN_ARCCOS
+        DPNP_FN_ARCCOS_EXT
         DPNP_FN_ARCCOSH
+        DPNP_FN_ARCCOSH_EXT
         DPNP_FN_ARCSIN
+        DPNP_FN_ARCSIN_EXT
         DPNP_FN_ARCSINH
+        DPNP_FN_ARCSINH_EXT
         DPNP_FN_ARCTAN
+        DPNP_FN_ARCTAN_EXT
         DPNP_FN_ARCTAN2
+        DPNP_FN_ARCTAN2_EXT
         DPNP_FN_ARCTANH
+        DPNP_FN_ARCTANH_EXT
         DPNP_FN_ARGMAX
+        DPNP_FN_ARGMAX_EXT
         DPNP_FN_ARGMIN
+        DPNP_FN_ARGMIN_EXT
         DPNP_FN_ARGSORT
+        DPNP_FN_ARGSORT_EXT
         DPNP_FN_AROUND
+        DPNP_FN_AROUND_EXT
         DPNP_FN_ASTYPE
+        DPNP_FN_ASTYPE_EXT
         DPNP_FN_BITWISE_AND
+        DPNP_FN_BITWISE_AND_EXT
         DPNP_FN_BITWISE_OR
+        DPNP_FN_BITWISE_OR_EXT
         DPNP_FN_BITWISE_XOR
+        DPNP_FN_BITWISE_XOR_EXT
         DPNP_FN_CBRT
+        DPNP_FN_CBRT_EXT
         DPNP_FN_CEIL
+        DPNP_FN_CEIL_EXT
         DPNP_FN_CHOLESKY
+        DPNP_FN_CHOLESKY_EXT
         DPNP_FN_CHOOSE
+        DPNP_FN_CHOOSE_EXT
         DPNP_FN_CONJIGUATE
+        DPNP_FN_CONJIGUATE_EXT
         DPNP_FN_COPY
+        DPNP_FN_COPY_EXT
         DPNP_FN_COPYSIGN
+        DPNP_FN_COPYSIGN_EXT
         DPNP_FN_COPYTO
+        DPNP_FN_COPYTO_EXT
         DPNP_FN_CORRELATE
+        DPNP_FN_CORRELATE_EXT
         DPNP_FN_COS
+        DPNP_FN_COS_EXT
         DPNP_FN_COSH
+        DPNP_FN_COSH_EXT
         DPNP_FN_COV
+        DPNP_FN_COV_EXT
         DPNP_FN_COUNT_NONZERO
+        DPNP_FN_COUNT_NONZERO_EXT
         DPNP_FN_CROSS
+        DPNP_FN_CROSS_EXT
         DPNP_FN_CUMPROD
+        DPNP_FN_CUMPROD_EXT
         DPNP_FN_CUMSUM
+        DPNP_FN_CUMSUM_EXT
         DPNP_FN_DEGREES
+        DPNP_FN_DEGREES_EXT
         DPNP_FN_DET
+        DPNP_FN_DET_EXT
         DPNP_FN_DIAG
+        DPNP_FN_DIAG_EXT
         DPNP_FN_DIAG_INDICES
+        DPNP_FN_DIAG_INDICES_EXT
         DPNP_FN_DIAGONAL
+        DPNP_FN_DIAGONAL_EXT
         DPNP_FN_DIVIDE
+        DPNP_FN_DIVIDE_EXT
         DPNP_FN_DOT
+        DPNP_FN_DOT_EXT
         DPNP_FN_EDIFF1D
+        DPNP_FN_EDIFF1D_EXT
         DPNP_FN_EIG
+        DPNP_FN_EIG_EXT
         DPNP_FN_EIGVALS
+        DPNP_FN_EIGVALS_EXT
         DPNP_FN_ERF
+        DPNP_FN_ERF_EXT
         DPNP_FN_EYE
+        DPNP_FN_EYE_EXT
         DPNP_FN_EXP
+        DPNP_FN_EXP_EXT
         DPNP_FN_EXP2
+        DPNP_FN_EXP2_EXT
         DPNP_FN_EXPM1
+        DPNP_FN_EXPM1_EXT
         DPNP_FN_FABS
+        DPNP_FN_FABS_EXT
         DPNP_FN_FFT_FFT
+        DPNP_FN_FFT_FFT_EXT
         DPNP_FN_FILL_DIAGONAL
+        DPNP_FN_FILL_DIAGONAL_EXT
         DPNP_FN_FLATTEN
+        DPNP_FN_FLATTEN_EXT
         DPNP_FN_FLOOR
+        DPNP_FN_FLOOR_EXT
         DPNP_FN_FLOOR_DIVIDE
+        DPNP_FN_FLOOR_DIVIDE_EXT
         DPNP_FN_FMOD
+        DPNP_FN_FMOD_EXT
         DPNP_FN_FULL
+        DPNP_FN_FULL_EXT
         DPNP_FN_FULL_LIKE
+        DPNP_FN_FULL_LIKE_EXT
         DPNP_FN_HYPOT
+        DPNP_FN_HYPOT_EXT
         DPNP_FN_IDENTITY
+        DPNP_FN_IDENTITY_EXT
         DPNP_FN_INITVAL
+        DPNP_FN_INITVAL_EXT
         DPNP_FN_INV
+        DPNP_FN_INV_EXT
         DPNP_FN_INVERT
+        DPNP_FN_INVERT_EXT
         DPNP_FN_KRON
+        DPNP_FN_KRON_EXT
         DPNP_FN_LEFT_SHIFT
+        DPNP_FN_LEFT_SHIFT_EXT
         DPNP_FN_LOG
+        DPNP_FN_LOG_EXT
         DPNP_FN_LOG10
+        DPNP_FN_LOG10_EXT
         DPNP_FN_LOG1P
+        DPNP_FN_LOG1P_EXT
         DPNP_FN_LOG2
+        DPNP_FN_LOG2_EXT
         DPNP_FN_MATMUL
         DPNP_FN_MATMUL_EXT
         DPNP_FN_MATRIX_RANK
+        DPNP_FN_MATRIX_RANK_EXT
         DPNP_FN_MAX
+        DPNP_FN_MAX_EXT
         DPNP_FN_MAXIMUM
+        DPNP_FN_MAXIMUM_EXT
         DPNP_FN_MEAN
+        DPNP_FN_MEAN_EXT
         DPNP_FN_MEDIAN
+        DPNP_FN_MEDIAN_EXT
         DPNP_FN_MIN
+        DPNP_FN_MIN_EXT
         DPNP_FN_MINIMUM
+        DPNP_FN_MINIMUM_EXT
         DPNP_FN_MODF
+        DPNP_FN_MODF_EXT
         DPNP_FN_MULTIPLY
+        DPNP_FN_MULTIPLY_EXT
         DPNP_FN_NANVAR
+        DPNP_FN_NANVAR_EXT
         DPNP_FN_NEGATIVE
+        DPNP_FN_NEGATIVE_EXT
         DPNP_FN_NONZERO
+        DPNP_FN_NONZERO_EXT
         DPNP_FN_ONES
+        DPNP_FN_ONES_EXT
         DPNP_FN_ONES_LIKE
+        DPNP_FN_ONES_LIKE_EXT
         DPNP_FN_PARTITION
+        DPNP_FN_PARTITION_EXT
         DPNP_FN_PLACE
+        DPNP_FN_PLACE_EXT
         DPNP_FN_POWER
+        DPNP_FN_POWER_EXT
         DPNP_FN_PROD
+        DPNP_FN_PROD_EXT
         DPNP_FN_PTP
+        DPNP_FN_PTP_EXT
         DPNP_FN_PUT
+        DPNP_FN_PUT_EXT
         DPNP_FN_QR
+        DPNP_FN_QR_EXT
         DPNP_FN_RADIANS
+        DPNP_FN_RADIANS_EXT
         DPNP_FN_REMAINDER
+        DPNP_FN_REMAINDER_EXT
         DPNP_FN_RECIP
+        DPNP_FN_RECIP_EXT
         DPNP_FN_REPEAT
+        DPNP_FN_REPEAT_EXT
         DPNP_FN_RIGHT_SHIFT
+        DPNP_FN_RIGHT_SHIFT_EXT
         DPNP_FN_RNG_BETA
+        DPNP_FN_RNG_BETA_EXT
         DPNP_FN_RNG_BINOMIAL
+        DPNP_FN_RNG_BINOMIAL_EXT
         DPNP_FN_RNG_CHISQUARE
+        DPNP_FN_RNG_CHISQUARE_EXT
         DPNP_FN_RNG_EXPONENTIAL
+        DPNP_FN_RNG_EXPONENTIAL_EXT
         DPNP_FN_RNG_F
+        DPNP_FN_RNG_F_EXT
         DPNP_FN_RNG_GAMMA
+        DPNP_FN_RNG_GAMMA_EXT
         DPNP_FN_RNG_GAUSSIAN
+        DPNP_FN_RNG_GAUSSIAN_EXT
         DPNP_FN_RNG_GEOMETRIC
+        DPNP_FN_RNG_GEOMETRIC_EXT
         DPNP_FN_RNG_GUMBEL
+        DPNP_FN_RNG_GUMBEL_EXT
         DPNP_FN_RNG_HYPERGEOMETRIC
+        DPNP_FN_RNG_HYPERGEOMETRIC_EXT
         DPNP_FN_RNG_LAPLACE
+        DPNP_FN_RNG_LAPLACE_EXT
         DPNP_FN_RNG_LOGISTIC
+        DPNP_FN_RNG_LOGISTIC_EXT
         DPNP_FN_RNG_LOGNORMAL
+        DPNP_FN_RNG_LOGNORMAL_EXT
         DPNP_FN_RNG_MULTINOMIAL
+        DPNP_FN_RNG_MULTINOMIAL_EXT
         DPNP_FN_RNG_MULTIVARIATE_NORMAL
+        DPNP_FN_RNG_MULTIVARIATE_NORMAL_EXT
         DPNP_FN_RNG_NEGATIVE_BINOMIAL
+        DPNP_FN_RNG_NEGATIVE_BINOMIAL_EXT
         DPNP_FN_RNG_NONCENTRAL_CHISQUARE
+        DPNP_FN_RNG_NONCENTRAL_CHISQUARE_EXT
         DPNP_FN_RNG_NORMAL
+        DPNP_FN_RNG_NORMAL_EXT
         DPNP_FN_RNG_PARETO
+        DPNP_FN_RNG_PARETO_EXT
         DPNP_FN_RNG_POISSON
+        DPNP_FN_RNG_POISSON_EXT
         DPNP_FN_RNG_POWER
+        DPNP_FN_RNG_POWER_EXT
         DPNP_FN_PUT_ALONG_AXIS
+        DPNP_FN_PUT_ALONG_AXIS_EXT
         DPNP_FN_RNG_RAYLEIGH
+        DPNP_FN_RNG_RAYLEIGH_EXT
         DPNP_FN_RNG_SHUFFLE
+        DPNP_FN_RNG_SHUFFLE_EXT
         DPNP_FN_RNG_SRAND
+        DPNP_FN_RNG_SRAND_EXT
         DPNP_FN_RNG_STANDARD_CAUCHY
+        DPNP_FN_RNG_STANDARD_CAUCHY_EXT
         DPNP_FN_RNG_STANDARD_EXPONENTIAL
+        DPNP_FN_RNG_STANDARD_EXPONENTIAL_EXT
         DPNP_FN_RNG_STANDARD_GAMMA
+        DPNP_FN_RNG_STANDARD_GAMMA_EXT
         DPNP_FN_RNG_STANDARD_NORMAL
+        DPNP_FN_RNG_STANDARD_NORMAL_EXT
         DPNP_FN_RNG_STANDARD_T
+        DPNP_FN_RNG_STANDARD_T_EXT
         DPNP_FN_RNG_TRIANGULAR
+        DPNP_FN_RNG_TRIANGULAR_EXT
         DPNP_FN_RNG_UNIFORM
+        DPNP_FN_RNG_UNIFORM_EXT
         DPNP_FN_RNG_VONMISES
+        DPNP_FN_RNG_VONMISES_EXT
         DPNP_FN_RNG_WALD
+        DPNP_FN_RNG_WALD_EXT
         DPNP_FN_RNG_WEIBULL
+        DPNP_FN_RNG_WEIBULL_EXT
         DPNP_FN_RNG_ZIPF
+        DPNP_FN_RNG_ZIPF_EXT
         DPNP_FN_SEARCHSORTED
+        DPNP_FN_SEARCHSORTED_EXT
         DPNP_FN_SIGN
+        DPNP_FN_SIGN_EXT
         DPNP_FN_SIN
+        DPNP_FN_SIN_EXT
         DPNP_FN_SINH
+        DPNP_FN_SINH_EXT
         DPNP_FN_SORT
+        DPNP_FN_SORT_EXT
         DPNP_FN_SQRT
+        DPNP_FN_SQRT_EXT
         DPNP_FN_SQUARE
+        DPNP_FN_SQUARE_EXT
         DPNP_FN_STD
+        DPNP_FN_STD_EXT
         DPNP_FN_SUBTRACT
+        DPNP_FN_SUBTRACT_EXT
         DPNP_FN_SUM
+        DPNP_FN_SUM_EXT
         DPNP_FN_SVD
+        DPNP_FN_SVD_EXT
         DPNP_FN_TAKE
+        DPNP_FN_TAKE_EXT
         DPNP_FN_TAN
+        DPNP_FN_TAN_EXT
         DPNP_FN_TANH
+        DPNP_FN_TANH_EXT
         DPNP_FN_TRACE
+        DPNP_FN_TRACE_EXT
         DPNP_FN_TRANSPOSE
+        DPNP_FN_TRANSPOSE_EXT
         DPNP_FN_TRAPZ
+        DPNP_FN_TRAPZ_EXT
         DPNP_FN_TRI
+        DPNP_FN_TRI_EXT
         DPNP_FN_TRIL
+        DPNP_FN_TRIL_EXT
         DPNP_FN_TRIU
+        DPNP_FN_TRIU_EXT
         DPNP_FN_TRUNC
+        DPNP_FN_TRUNC_EXT
         DPNP_FN_VANDER
+        DPNP_FN_VANDER_EXT
         DPNP_FN_VAR
+        DPNP_FN_VAR_EXT
         DPNP_FN_ZEROS
+        DPNP_FN_ZEROS_EXT
         DPNP_FN_ZEROS_LIKE
+        DPNP_FN_ZEROS_LIKE_EXT
 
 cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncType":  # need this namespace for Enum import
     cdef enum DPNPFuncType "DPNPFuncType":
@@ -234,22 +395,48 @@ cdef extern from "dpnp_iface.hpp":
 
 
 # C function pointer to the C library template functions
-ctypedef void(*fptr_1out_t)(void * , size_t)
-ctypedef void(*fptr_1in_1out_t)(void *, void * , size_t)
-ctypedef void(*fptr_1in_1out_strides_t)(void *, const size_t, const size_t,
-                                        const shape_elem_type * , const shape_elem_type * ,
-                                        void *, const size_t, const size_t,
-                                        const shape_elem_type * , const shape_elem_type * ,
-                                        const long * )
-ctypedef void(*fptr_2in_1out_t)(void * , const void * , const size_t, const shape_elem_type * , const size_t,
-                                const void *, const size_t, const shape_elem_type * , const size_t, const long * )
-ctypedef void(*fptr_2in_1out_strides_t)(void *, const size_t, const size_t,
-                                        const shape_elem_type * , const shape_elem_type * ,
-                                        void *, const size_t, const size_t,
-                                        const shape_elem_type * , const shape_elem_type * ,
-                                        void *, const size_t, const size_t,
-                                        const shape_elem_type * , const shape_elem_type * ,
-                                        const long * )
+ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_1out_t)(c_dpctl.DPCTLSyclQueueRef,
+                                                 void * , size_t,
+                                                 const c_dpctl.DPCTLEventVectorRef)
+ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_1in_1out_t)(c_dpctl.DPCTLSyclQueueRef,
+                                                     void *, void * , size_t,
+                                                     const c_dpctl.DPCTLEventVectorRef)
+ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_1in_1out_strides_t)(c_dpctl.DPCTLSyclQueueRef,
+                                                             void *, const size_t, const size_t,
+                                                             const shape_elem_type * , const shape_elem_type * ,
+                                                             void *, const size_t, const size_t,
+                                                             const shape_elem_type * , const shape_elem_type * ,
+                                                             const long * ,
+                                                             const c_dpctl.DPCTLEventVectorRef)
+ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_2in_1out_t)(c_dpctl.DPCTLSyclQueueRef,
+                                                     void * ,
+                                                     const void * ,
+                                                     const size_t,
+                                                     const shape_elem_type * ,
+                                                     const size_t,
+                                                     const void *,
+                                                     const size_t,
+                                                     const shape_elem_type * ,
+                                                     const size_t,
+                                                     const long * ,
+                                                     const c_dpctl.DPCTLEventVectorRef)
+ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_2in_1out_strides_t)(c_dpctl.DPCTLSyclQueueRef,
+                                                             void *,
+                                                             const size_t,
+                                                             const size_t,
+                                                             const shape_elem_type * ,
+                                                             const shape_elem_type * ,
+                                                             void *,
+                                                             const size_t,
+                                                             const size_t,
+                                                             const shape_elem_type * ,
+                                                             const shape_elem_type * ,
+                                                             void *,
+                                                             const size_t, const size_t,
+                                                             const shape_elem_type * ,
+                                                             const shape_elem_type * ,
+                                                             const long * ,
+                                                             const c_dpctl.DPCTLEventVectorRef)
 ctypedef void(*fptr_blas_gemm_2in_1out_t)(void *, void * , void * , size_t, size_t, size_t)
 ctypedef void(*dpnp_reduction_c_t)(void *, const void * , const shape_elem_type*, const size_t, const shape_elem_type*, const size_t, const void * , const long*)
 
diff --git a/dpnp/dpnp_algo/dpnp_algo.pyx b/dpnp/dpnp_algo/dpnp_algo.pyx
index f9445381c78c..3018776e596d 100644
--- a/dpnp/dpnp_algo/dpnp_algo.pyx
+++ b/dpnp/dpnp_algo/dpnp_algo.pyx
@@ -36,10 +36,10 @@ from libc.time cimport time, time_t
 from libcpp.vector cimport vector
 import dpnp
 import dpnp.config as config
+import dpnp.dpnp_container as dpnp_container
 import dpnp.dpnp_utils as utils_py
 from dpnp.dpnp_array import dpnp_array
 
-cimport dpctl as c_dpctl
 import dpctl
 
 cimport cpython
@@ -74,14 +74,22 @@ include "dpnp_algo_statistics.pyx"
 include "dpnp_algo_trigonometric.pyx"
 
 
-ctypedef void(*fptr_dpnp_arange_t)(size_t, size_t, void *, size_t)
-ctypedef void(*fptr_dpnp_astype_t)(const void *, void * , const size_t)
-ctypedef void(*fptr_dpnp_flatten_t)(void *, const size_t, const size_t,
-                                    const shape_elem_type * , const shape_elem_type * ,
-                                    void *, const size_t, const size_t,
-                                    const shape_elem_type * , const shape_elem_type * ,
-                                    const long * )
-ctypedef void(*fptr_dpnp_initval_t)(void *, void * , size_t)
+ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_arange_t)(c_dpctl.DPCTLSyclQueueRef,
+                                                        size_t, size_t, void *, size_t,
+                                                        const c_dpctl.DPCTLEventVectorRef)
+ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_astype_t)(c_dpctl.DPCTLSyclQueueRef,
+                                                        const void *, void * , const size_t,
+                                                        const c_dpctl.DPCTLEventVectorRef)
+ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_flatten_t)(c_dpctl.DPCTLSyclQueueRef,
+                                                         void *, const size_t, const size_t,
+                                                         const shape_elem_type * , const shape_elem_type * ,
+                                                         void *, const size_t, const size_t,
+                                                         const shape_elem_type * , const shape_elem_type * ,
+                                                         const long * ,
+                                                         const c_dpctl.DPCTLEventVectorRef)
+ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_initval_t)(c_dpctl.DPCTLSyclQueueRef,
+                                                         void *, void * , size_t,
+                                                         const c_dpctl.DPCTLEventVectorRef)
 
 
 cpdef utils.dpnp_descriptor dpnp_arange(start, stop, step, dtype):
@@ -92,15 +100,27 @@ cpdef utils.dpnp_descriptor dpnp_arange(start, stop, step, dtype):
     cdef tuple obj_shape = utils._object_to_tuple(obj_len)
 
     cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(dtype)
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_ARANGE, param1_type, param1_type)
+    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_ARANGE_EXT, param1_type, param1_type)
 
     cdef utils.dpnp_descriptor result = utils.create_output_descriptor(obj_shape, kernel_data.return_type, None)
 
     # for i in range(result.size):
     #     result[i] = start + i
 
+    result_sycl_queue = result.get_array().sycl_queue
+
+    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
+    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
+
     cdef fptr_dpnp_arange_t func = <fptr_dpnp_arange_t > kernel_data.ptr
-    func(start, step, result.get_data(), result.size)
+    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref,
+                                                    start,
+                                                    step,
+                                                    result.get_data(),
+                                                    result.size,
+                                                    NULL)  # dep_events_ref)
+    with nogil: c_dpctl.DPCTLEvent_Wait(event_ref)
+    c_dpctl.DPCTLEvent_Delete(event_ref)
 
     return result
 
@@ -109,7 +129,7 @@ cpdef utils.dpnp_descriptor dpnp_astype(utils.dpnp_descriptor x1, dtype):
     cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(x1.dtype)
     cdef DPNPFuncType param2_type = dpnp_dtype_to_DPNPFuncType(dtype)
 
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_ASTYPE, param1_type, param2_type)
+    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_ASTYPE_EXT, param1_type, param2_type)
 
     x1_obj = x1.get_array()
 
@@ -122,8 +142,16 @@ cpdef utils.dpnp_descriptor dpnp_astype(utils.dpnp_descriptor x1, dtype):
                                                                        usm_type=x1_obj.usm_type,
                                                                        sycl_queue=x1_obj.sycl_queue)
 
+    result_sycl_queue = result.get_array().sycl_queue
+
+    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
+    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
+
     cdef fptr_dpnp_astype_t func = <fptr_dpnp_astype_t > kernel_data.ptr
-    func(x1.get_data(), result.get_data(), x1.size)
+    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, x1.get_data(), result.get_data(), x1.size, NULL)
+
+    with nogil: c_dpctl.DPCTLEvent_Wait(event_ref)
+    c_dpctl.DPCTLEvent_Delete(event_ref)
 
     return result
 
@@ -131,7 +159,7 @@ cpdef utils.dpnp_descriptor dpnp_astype(utils.dpnp_descriptor x1, dtype):
 cpdef utils.dpnp_descriptor dpnp_flatten(utils.dpnp_descriptor x1):
     cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(x1.dtype)
 
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_FLATTEN, param1_type, param1_type)
+    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_FLATTEN_EXT, param1_type, param1_type)
 
     cdef shape_type_c x1_shape = x1.shape
     cdef shape_type_c x1_strides = utils.strides_to_vector(x1.strides, x1_shape)
@@ -147,20 +175,30 @@ cpdef utils.dpnp_descriptor dpnp_flatten(utils.dpnp_descriptor x1):
                                                                        usm_type=x1_obj.usm_type,
                                                                        sycl_queue=x1_obj.sycl_queue)
 
+    result_sycl_queue = result.get_array().sycl_queue
+
+    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
+    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
+
     cdef shape_type_c result_strides = utils.strides_to_vector(result.strides, result_shape)
 
     cdef fptr_dpnp_flatten_t func = <fptr_dpnp_flatten_t > kernel_data.ptr
-    func(result.get_data(),
-         result.size,
-         result.ndim,
-         result_shape.data(),
-         result_strides.data(),
-         x1.get_data(),
-         x1.size,
-         x1.ndim,
-         x1_shape.data(),
-         x1_strides.data(),
-         NULL)
+    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref,
+                                                    result.get_data(),
+                                                    result.size,
+                                                    result.ndim,
+                                                    result_shape.data(),
+                                                    result_strides.data(),
+                                                    x1.get_data(),
+                                                    x1.size,
+                                                    x1.ndim,
+                                                    x1_shape.data(),
+                                                    x1_strides.data(),
+                                                    NULL,
+                                                    NULL)  # dep_events_ref
+
+    with nogil: c_dpctl.DPCTLEvent_Wait(event_ref)
+    c_dpctl.DPCTLEvent_Delete(event_ref)
 
     return result
 
@@ -171,16 +209,29 @@ cpdef utils.dpnp_descriptor dpnp_init_val(shape, dtype, value):
     """
     cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(dtype)
 
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_INITVAL, param1_type, param1_type)
+    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_INITVAL_EXT, param1_type, param1_type)
 
     cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(shape, dtype, None)
 
+    result_obj = result.get_array()
+
     # TODO: find better way to pass single value with type conversion
-    cdef utils.dpnp_descriptor val_arr = utils_py.create_output_descriptor_py((1, ), dtype, None)
+    cdef utils.dpnp_descriptor val_arr = utils_py.create_output_descriptor_py((1, ),
+                                                                              dtype,
+                                                                              None,
+                                                                              device=result_obj.sycl_device,
+                                                                              usm_type=result_obj.usm_type,
+                                                                              sycl_queue=result_obj.sycl_queue)
     val_arr.get_pyobj()[0] = value
 
+    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_obj.sycl_queue
+    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
+
     cdef fptr_dpnp_initval_t func = <fptr_dpnp_initval_t > kernel_data.ptr
-    func(result.get_data(), val_arr.get_data(), result.size)
+    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, result.get_data(), val_arr.get_data(), result.size, NULL)
+
+    with nogil: c_dpctl.DPCTLEvent_Wait(event_ref)
+    c_dpctl.DPCTLEvent_Delete(event_ref)
 
     return result
 
@@ -280,9 +331,17 @@ cdef utils.dpnp_descriptor call_fptr_1out(DPNPFuncName fptr_name,
     # Create result array with type given by FPTR data
     cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, kernel_data.return_type, None)
 
+    result_sycl_queue = result.get_array().sycl_queue
+
+    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
+    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
+
     cdef fptr_1out_t func = <fptr_1out_t > kernel_data.ptr
     # Call FPTR function
-    func(result.get_data(), result.size)
+    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, result.get_data(), result.size, NULL)
+
+    with nogil: c_dpctl.DPCTLEvent_Wait(event_ref)
+    c_dpctl.DPCTLEvent_Delete(event_ref)
 
     return result
 
@@ -320,9 +379,17 @@ cdef utils.dpnp_descriptor call_fptr_1in_1out(DPNPFuncName fptr_name,
 
         result = out
 
+    result_sycl_queue = result.get_array().sycl_queue
+
+    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
+    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
+
     cdef fptr_1in_1out_t func = <fptr_1in_1out_t > kernel_data.ptr
 
-    func(x1.get_data(), result.get_data(), x1.size)
+    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, x1.get_data(), result.get_data(), x1.size, NULL)
+
+    with nogil: c_dpctl.DPCTLEvent_Wait(event_ref)
+    c_dpctl.DPCTLEvent_Delete(event_ref)
 
     return result
 
@@ -365,21 +432,31 @@ cdef utils.dpnp_descriptor call_fptr_1in_1out_strides(DPNPFuncName fptr_name,
 
         result = out
 
+    result_sycl_queue = result.get_array().sycl_queue
+
+    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
+    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
+
     cdef shape_type_c result_strides = utils.strides_to_vector(result.strides, result_shape)
 
     """ Call FPTR function """
     cdef fptr_1in_1out_strides_t func = <fptr_1in_1out_strides_t > kernel_data.ptr
-    func(result.get_data(),
-         result.size,
-         result.ndim,
-         result_shape.data(),
-         result_strides.data(),
-         x1.get_data(),
-         x1.size,
-         x1.ndim,
-         x1_shape.data(),
-         x1_strides.data(),
-         NULL)
+    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref,
+                                                    result.get_data(),
+                                                    result.size,
+                                                    result.ndim,
+                                                    result_shape.data(),
+                                                    result_strides.data(),
+                                                    x1.get_data(),
+                                                    x1.size,
+                                                    x1.ndim,
+                                                    x1_shape.data(),
+                                                    x1_strides.data(),
+                                                    NULL,
+                                                    NULL)  # dep_events_ref
+
+    with nogil: c_dpctl.DPCTLEvent_Wait(event_ref)
+    c_dpctl.DPCTLEvent_Delete(event_ref)
 
     return result
 
@@ -424,18 +501,26 @@ cdef utils.dpnp_descriptor call_fptr_2in_1out(DPNPFuncName fptr_name,
 
         result = out
 
+    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
+    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
+
     """ Call FPTR function """
     cdef fptr_2in_1out_t func = <fptr_2in_1out_t > kernel_data.ptr
-    func(result.get_data(),
-         x1_obj.get_data(),
-         x1_obj.size,
-         x1_shape.data(),
-         x1_shape.size(),
-         x2_obj.get_data(),
-         x2_obj.size,
-         x2_shape.data(),
-         x2_shape.size(),
-         NULL)
+    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref,
+                                                    result.get_data(),
+                                                    x1_obj.get_data(),
+                                                    x1_obj.size,
+                                                    x1_shape.data(),
+                                                    x1_shape.size(),
+                                                    x2_obj.get_data(),
+                                                    x2_obj.size,
+                                                    x2_shape.data(),
+                                                    x2_shape.size(),
+                                                    NULL,
+                                                    NULL)  # dep_events_ref)
+
+    with nogil: c_dpctl.DPCTLEvent_Wait(event_ref)
+    c_dpctl.DPCTLEvent_Delete(event_ref)
 
     return result
 
@@ -485,23 +570,33 @@ cdef utils.dpnp_descriptor call_fptr_2in_1out_strides(DPNPFuncName fptr_name,
 
     cdef shape_type_c result_strides = utils.strides_to_vector(result.strides, result_shape)
 
+    result_obj = result.get_array()
+
+    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_obj.sycl_queue
+    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
+
     """ Call FPTR function """
     cdef fptr_2in_1out_strides_t func = <fptr_2in_1out_strides_t > kernel_data.ptr
-    func(result.get_data(),
-         result.size,
-         result.ndim,
-         result_shape.data(),
-         result_strides.data(),
-         x1_obj.get_data(),
-         x1_obj.size,
-         x1_obj.ndim,
-         x1_shape.data(),
-         x1_strides.data(),
-         x2_obj.get_data(),
-         x2_obj.size,
-         x2_obj.ndim,
-         x2_shape.data(),
-         x2_strides.data(),
-         NULL)
+    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref,
+                                                    result.get_data(),
+                                                    result.size,
+                                                    result.ndim,
+                                                    result_shape.data(),
+                                                    result_strides.data(),
+                                                    x1_obj.get_data(),
+                                                    x1_obj.size,
+                                                    x1_obj.ndim,
+                                                    x1_shape.data(),
+                                                    x1_strides.data(),
+                                                    x2_obj.get_data(),
+                                                    x2_obj.size,
+                                                    x2_obj.ndim,
+                                                    x2_shape.data(),
+                                                    x2_strides.data(),
+                                                    NULL,
+                                                    NULL)  # dep_events_ref)
+
+    with nogil: c_dpctl.DPCTLEvent_Wait(event_ref)
+    c_dpctl.DPCTLEvent_Delete(event_ref)
 
     return result
diff --git a/dpnp/dpnp_algo/dpnp_algo_arraycreation.pyx b/dpnp/dpnp_algo/dpnp_algo_arraycreation.pyx
index 6217e31d2a9a..11912d2423cc 100644
--- a/dpnp/dpnp_algo/dpnp_algo_arraycreation.pyx
+++ b/dpnp/dpnp_algo/dpnp_algo_arraycreation.pyx
@@ -58,47 +58,91 @@ __all__ += [
 ]
 
 
-ctypedef void(*custom_1in_1out_func_ptr_t)(void *, void * , const int , shape_elem_type * , shape_elem_type * , const size_t, const size_t)
+ctypedef c_dpctl.DPCTLSyclEventRef(*custom_1in_1out_func_ptr_t)(c_dpctl.DPCTLSyclQueueRef,
+                                                                void *,
+                                                                void * ,
+                                                                const int ,
+                                                                shape_elem_type * ,
+                                                                shape_elem_type * ,
+                                                                const size_t,
+                                                                const size_t,
+                                                                const c_dpctl.DPCTLEventVectorRef)
 ctypedef void(*ftpr_custom_vander_1in_1out_t)(void * , void * , size_t, size_t, int)
-ctypedef void(*custom_arraycreation_1in_1out_func_ptr_t)(void *, const size_t, const size_t, const shape_elem_type*, const shape_elem_type*,
-                                                         void *, const size_t, const size_t, const shape_elem_type*, const shape_elem_type*,
-                                                         const shape_elem_type *, const size_t)
+ctypedef c_dpctl.DPCTLSyclEventRef(*custom_arraycreation_1in_1out_func_ptr_t)(c_dpctl.DPCTLSyclQueueRef,
+                                                                              void *,
+                                                                              const size_t,
+                                                                              const size_t,
+                                                                              const shape_elem_type*,
+                                                                              const shape_elem_type*,
+                                                                              void *,
+                                                                              const size_t,
+                                                                              const size_t,
+                                                                              const shape_elem_type*,
+                                                                              const shape_elem_type*,
+                                                                              const shape_elem_type *,
+                                                                              const size_t,
+                                                                              const c_dpctl.DPCTLEventVectorRef)
 ctypedef void(*custom_indexing_1out_func_ptr_t)(void * , const size_t , const size_t , const int)
-ctypedef void(*fptr_dpnp_eye_t)(void *, int , const shape_elem_type * )
+ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_eye_t)(c_dpctl.DPCTLSyclQueueRef,
+                                                     void *, int , const shape_elem_type * ,
+                                                     const c_dpctl.DPCTLEventVectorRef)
 ctypedef void(*fptr_dpnp_trace_t)(const void *, void * , const shape_elem_type * , const size_t)
 
 
 cpdef utils.dpnp_descriptor dpnp_copy(utils.dpnp_descriptor x1):
-    return call_fptr_1in_1out_strides(DPNP_FN_COPY, x1)
+    return call_fptr_1in_1out_strides(DPNP_FN_COPY_EXT, x1)
 
 
 cpdef utils.dpnp_descriptor dpnp_diag(utils.dpnp_descriptor v, int k):
     cdef shape_type_c input_shape = v.shape
+    cdef shape_type_c result_shape
 
     if v.ndim == 1:
         n = v.shape[0] + abs(k)
 
-        shape_result = (n, n)
+        result_shape = (n, n)
     else:
         n = min(v.shape[0], v.shape[0] + k, v.shape[1], v.shape[1] - k)
         if n < 0:
             n = 0
 
-        shape_result = (n, )
+        result_shape = (n, )
 
-    result_obj = dpnp.zeros(shape_result, dtype=v.dtype)  # TODO need to call dpnp_zero instead
+    v_obj = v.get_array()
+    
+    result_obj = dpnp_container.empty(result_shape,
+                                      dtype=v.dtype,
+                                      device=v_obj.sycl_device,
+                                      usm_type=v_obj.usm_type,
+                                      sycl_queue=v_obj.sycl_queue)
     cdef utils.dpnp_descriptor result = dpnp_descriptor(result_obj)
 
     cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(v.dtype)
 
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_DIAG, param1_type, param1_type)
+    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_DIAG_EXT, param1_type, param1_type)
 
     result_type = dpnp_DPNPFuncType_to_dtype(< size_t > kernel_data.return_type)
 
+
+    result_sycl_queue = result.get_array().sycl_queue
+
+    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
+    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
+
     cdef custom_1in_1out_func_ptr_t func = <custom_1in_1out_func_ptr_t > kernel_data.ptr
-    cdef shape_type_c result_shape = result.shape
 
-    func(v.get_data(), result.get_data(), k, input_shape.data(), result_shape.data(), v.ndim, result.ndim)
+    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref,
+                                                    v.get_data(),
+                                                    result.get_data(),
+                                                    k,
+                                                    input_shape.data(),
+                                                    result_shape.data(),
+                                                    v.ndim,
+                                                    result.ndim,
+                                                    NULL)  # dep_events_ref
+
+    with nogil: c_dpctl.DPCTLEvent_Wait(event_ref)
+    c_dpctl.DPCTLEvent_Delete(event_ref)
 
     return result
 
@@ -112,15 +156,23 @@ cpdef utils.dpnp_descriptor dpnp_eye(N, M=None, k=0, dtype=None):
 
     cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(dtype)
 
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_EYE, param1_type, param1_type)
+    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_EYE_EXT, param1_type, param1_type)
 
     cdef utils.dpnp_descriptor result = utils.create_output_descriptor((N, M), kernel_data.return_type, None)
 
+    result_sycl_queue = result.get_array().sycl_queue
+
+    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
+    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
+
     cdef fptr_dpnp_eye_t func = <fptr_dpnp_eye_t > kernel_data.ptr
 
     cdef shape_type_c result_shape = result.shape
 
-    func(result.get_data(), k, result_shape.data())
+    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, result.get_data(), k, result_shape.data(), NULL)
+
+    with nogil: c_dpctl.DPCTLEvent_Wait(event_ref)
+    c_dpctl.DPCTLEvent_Delete(event_ref)
 
     return result
 
@@ -130,20 +182,34 @@ cpdef utils.dpnp_descriptor dpnp_full(result_shape, value_in, result_dtype):
     cdef DPNPFuncType dtype_in = dpnp_dtype_to_DPNPFuncType(result_dtype)
 
     # get the FPTR data structure
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_FULL, dtype_in, DPNP_FT_NONE)
+    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_FULL_EXT, dtype_in, DPNP_FT_NONE)
+
+    # ceate result array with type given by FPTR data
+    cdef shape_type_c result_shape_c = utils._object_to_tuple(result_shape)
+    cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape_c, kernel_data.return_type, None)
+
+    result_obj = result.get_array()
 
     # Create single-element input fill array with type given by FPTR data
     cdef shape_type_c shape_in = (1,)
-    cdef utils.dpnp_descriptor array_fill = utils.create_output_descriptor(shape_in, kernel_data.return_type, None)
+    cdef utils.dpnp_descriptor array_fill = utils.create_output_descriptor(shape_in,
+                                                                           kernel_data.return_type,
+                                                                           None,
+                                                                           device=result_obj.sycl_device,
+                                                                           usm_type=result_obj.usm_type,
+                                                                           sycl_queue=result_obj.sycl_queue)
     array_fill.get_pyobj()[0] = value_in
 
-    # ceate result array with type given by FPTR data
-    cdef shape_type_c result_shape_c = utils._object_to_tuple(result_shape)
-    cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape_c, kernel_data.return_type, None)
+    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_obj.sycl_queue
+    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
 
     cdef fptr_1in_1out_t func = <fptr_1in_1out_t > kernel_data.ptr
     # Call FPTR function
-    func(array_fill.get_data(), result.get_data(), result.size)
+
+    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, array_fill.get_data(), result.get_data(), result.size, NULL)
+
+    with nogil: c_dpctl.DPCTLEvent_Wait(event_ref)
+    c_dpctl.DPCTLEvent_Delete(event_ref)
 
     return result
 
@@ -153,20 +219,33 @@ cpdef utils.dpnp_descriptor dpnp_full_like(result_shape, value_in, result_dtype)
     cdef DPNPFuncType dtype_in = dpnp_dtype_to_DPNPFuncType(result_dtype)
 
     # get the FPTR data structure
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_FULL_LIKE, dtype_in, DPNP_FT_NONE)
+    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_FULL_LIKE_EXT, dtype_in, DPNP_FT_NONE)
+
+    # ceate result array with type given by FPTR data
+    cdef shape_type_c result_shape_c = utils._object_to_tuple(result_shape)
+    cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape_c, kernel_data.return_type, None)
+
+    result_obj = result.get_array()
 
     # Create single-element input fill array with type given by FPTR data
     cdef shape_type_c shape_in = (1,)
-    cdef utils.dpnp_descriptor array_fill = utils.create_output_descriptor(shape_in, kernel_data.return_type, None)
+    cdef utils.dpnp_descriptor array_fill = utils.create_output_descriptor(shape_in,
+                                                                           kernel_data.return_type,
+                                                                           None,
+                                                                           device=result_obj.sycl_device,
+                                                                           usm_type=result_obj.usm_type,
+                                                                           sycl_queue=result_obj.sycl_queue)
     array_fill.get_pyobj()[0] = value_in
 
-    # ceate result array with type given by FPTR data
-    cdef shape_type_c result_shape_c = utils._object_to_tuple(result_shape)
-    cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape_c, kernel_data.return_type, None)
+    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_obj.sycl_queue
+    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
 
     cdef fptr_1in_1out_t func = <fptr_1in_1out_t > kernel_data.ptr
     # Call FPTR function
-    func(array_fill.get_data(), result.get_data(), result.size)
+    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, array_fill.get_data(), result.get_data(), result.size, NULL)
+
+    with nogil: c_dpctl.DPCTLEvent_Wait(event_ref)
+    c_dpctl.DPCTLEvent_Delete(event_ref)
 
     return result
 
@@ -202,13 +281,21 @@ cpdef utils.dpnp_descriptor dpnp_geomspace(start, stop, num, endpoint, dtype, ax
 cpdef utils.dpnp_descriptor dpnp_identity(n, result_dtype):
     cdef DPNPFuncType dtype_in = dpnp_dtype_to_DPNPFuncType(result_dtype)
 
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_IDENTITY, dtype_in, DPNP_FT_NONE)
+    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_IDENTITY_EXT, dtype_in, DPNP_FT_NONE)
 
     cdef shape_type_c shape_in = (n, n)
     cdef utils.dpnp_descriptor result = utils.create_output_descriptor(shape_in, kernel_data.return_type, None)
 
+    result_sycl_queue = result.get_array().sycl_queue
+
+    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
+    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
+
     cdef fptr_1out_t func = <fptr_1out_t > kernel_data.ptr
-    func(result.get_data(), n)
+    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, result.get_data(), n, NULL)
+
+    with nogil: c_dpctl.DPCTLEvent_Wait(event_ref)
+    c_dpctl.DPCTLEvent_Delete(event_ref)
 
     return result
 
@@ -293,11 +380,11 @@ cpdef list dpnp_meshgrid(xi, copy, sparse, indexing):
 
 
 cpdef utils.dpnp_descriptor dpnp_ones(result_shape, result_dtype):
-    return call_fptr_1out(DPNP_FN_ONES, utils._object_to_tuple(result_shape), result_dtype)
+    return call_fptr_1out(DPNP_FN_ONES_EXT, utils._object_to_tuple(result_shape), result_dtype)
 
 
 cpdef utils.dpnp_descriptor dpnp_ones_like(result_shape, result_dtype):
-    return call_fptr_1out(DPNP_FN_ONES_LIKE, utils._object_to_tuple(result_shape), result_dtype)
+    return call_fptr_1out(DPNP_FN_ONES_LIKE_EXT, utils._object_to_tuple(result_shape), result_dtype)
 
 
 cpdef dpnp_ptp(utils.dpnp_descriptor arr, axis=None):
@@ -330,11 +417,16 @@ cpdef dpnp_ptp(utils.dpnp_descriptor arr, axis=None):
 
     cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(arr.dtype)
 
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_PTP, param1_type, param1_type)
+    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_PTP_EXT, param1_type, param1_type)
 
-    cdef utils.dpnp_descriptor result = utils.create_output_descriptor(output_shape, kernel_data.return_type, None)
+    arr_obj = arr.get_array()
 
-    cdef custom_arraycreation_1in_1out_func_ptr_t func = <custom_arraycreation_1in_1out_func_ptr_t > kernel_data.ptr
+    cdef utils.dpnp_descriptor result = utils.create_output_descriptor(output_shape,
+                                                                       kernel_data.return_type,
+                                                                       None,
+                                                                       device=arr_obj.sycl_device,
+                                                                       usm_type=arr_obj.usm_type,
+                                                                       sycl_queue=arr_obj.sycl_queue)
 
     cdef shape_type_c axis1
     cdef Py_ssize_t axis_size = 0
@@ -351,8 +443,29 @@ cpdef dpnp_ptp(utils.dpnp_descriptor arr, axis=None):
     cdef shape_type_c result_strides = utils.strides_to_vector(result.strides, result.shape)
     cdef shape_type_c arr_strides = utils.strides_to_vector(arr.strides, arr.shape)
 
-    func(result.get_data(), result.size, result.ndim, output_shape.data(), result_strides.data(),
-         arr.get_data(), arr.size, arr.ndim, shape_arr.data(), arr_strides.data(), axis2.data(), axis_size)
+    result_sycl_queue = result.get_array().sycl_queue
+
+    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
+    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
+
+    cdef custom_arraycreation_1in_1out_func_ptr_t func = <custom_arraycreation_1in_1out_func_ptr_t > kernel_data.ptr
+    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref,
+                                                    result.get_data(),
+                                                    result.size,
+                                                    result.ndim,
+                                                    output_shape.data(),
+                                                    result_strides.data(),
+                                                    arr.get_data(),
+                                                    arr.size,
+                                                    arr.ndim,
+                                                    shape_arr.data(),
+                                                    arr_strides.data(),
+                                                    axis2.data(),
+                                                    axis_size,
+                                                    NULL)  # dep_events_ref
+
+    with nogil: c_dpctl.DPCTLEvent_Wait(event_ref)
+    c_dpctl.DPCTLEvent_Delete(event_ref)
 
     return result
 
@@ -414,13 +527,36 @@ cpdef utils.dpnp_descriptor dpnp_tril(utils.dpnp_descriptor m, int k):
         result_shape = m.shape
 
     cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(m.dtype)
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_TRIL, param1_type, param1_type)
+    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_TRIL_EXT, param1_type, param1_type)
+
+    m_obj = m.get_array()
 
     # ceate result array with type given by FPTR data
-    cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, kernel_data.return_type, None)
+    cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape,
+                                                                       kernel_data.return_type,
+                                                                       None,
+                                                                       device=m_obj.sycl_device,
+                                                                       usm_type=m_obj.usm_type,
+                                                                       sycl_queue=m_obj.sycl_queue)
+
+    result_sycl_queue = result.get_array().sycl_queue
+
+    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
+    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
 
     cdef custom_1in_1out_func_ptr_t func = <custom_1in_1out_func_ptr_t > kernel_data.ptr
-    func(m.get_data(), result.get_data(), k, input_shape.data(), result_shape.data(), m.ndim, result.ndim)
+    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref,
+                                                    m.get_data(),
+                                                    result.get_data(),
+                                                    k,
+                                                    input_shape.data(),
+                                                    result_shape.data(),
+                                                    m.ndim,
+                                                    result.ndim,
+                                                    NULL)  # dep_events_ref
+
+    with nogil: c_dpctl.DPCTLEvent_Wait(event_ref)
+    c_dpctl.DPCTLEvent_Delete(event_ref)
 
     return result
 
@@ -435,13 +571,36 @@ cpdef utils.dpnp_descriptor dpnp_triu(utils.dpnp_descriptor m, int k):
         result_shape = m.shape
 
     cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(m.dtype)
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_TRIU, param1_type, param1_type)
+    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_TRIU_EXT, param1_type, param1_type)
+
+    m_obj = m.get_array()
 
     # ceate result array with type given by FPTR data
-    cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, kernel_data.return_type, None)
+    cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape,
+                                                                       kernel_data.return_type,
+                                                                       None,
+                                                                       device=m_obj.sycl_device,
+                                                                       usm_type=m_obj.usm_type,
+                                                                       sycl_queue=m_obj.sycl_queue)
+
+    result_sycl_queue = result.get_array().sycl_queue
+
+    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
+    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
 
     cdef custom_1in_1out_func_ptr_t func = <custom_1in_1out_func_ptr_t > kernel_data.ptr
-    func(m.get_data(), result.get_data(), k, input_shape.data(), result_shape.data(), m.ndim, result.ndim)
+    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref,
+                                                    m.get_data(),
+                                                    result.get_data(),
+                                                    k,
+                                                    input_shape.data(),
+                                                    result_shape.data(),
+                                                    m.ndim,
+                                                    result.ndim,
+                                                    NULL)  # dep_events_ref
+
+    with nogil: c_dpctl.DPCTLEvent_Wait(event_ref)
+    c_dpctl.DPCTLEvent_Delete(event_ref)
 
     return result
 
@@ -461,8 +620,8 @@ cpdef utils.dpnp_descriptor dpnp_vander(utils.dpnp_descriptor x1, int N, int inc
 
 
 cpdef utils.dpnp_descriptor dpnp_zeros(result_shape, result_dtype):
-    return call_fptr_1out(DPNP_FN_ZEROS, utils._object_to_tuple(result_shape), result_dtype)
+    return call_fptr_1out(DPNP_FN_ZEROS_EXT, utils._object_to_tuple(result_shape), result_dtype)
 
 
 cpdef utils.dpnp_descriptor dpnp_zeros_like(result_shape, result_dtype):
-    return call_fptr_1out(DPNP_FN_ZEROS_LIKE, utils._object_to_tuple(result_shape), result_dtype)
+    return call_fptr_1out(DPNP_FN_ZEROS_LIKE_EXT, utils._object_to_tuple(result_shape), result_dtype)
diff --git a/dpnp/dpnp_algo/dpnp_algo_bitwise.pyx b/dpnp/dpnp_algo/dpnp_algo_bitwise.pyx
index b09304033098..482f00c2c71d 100644
--- a/dpnp/dpnp_algo/dpnp_algo_bitwise.pyx
+++ b/dpnp/dpnp_algo/dpnp_algo_bitwise.pyx
@@ -49,7 +49,7 @@ cpdef utils.dpnp_descriptor dpnp_bitwise_and(utils.dpnp_descriptor x1_obj,
                                              object dtype=None,
                                              utils.dpnp_descriptor out=None,
                                              object where=True):
-    return call_fptr_2in_1out_strides(DPNP_FN_BITWISE_AND, x1_obj, x2_obj, dtype=dtype, out=out, where=where)
+    return call_fptr_2in_1out_strides(DPNP_FN_BITWISE_AND_EXT, x1_obj, x2_obj, dtype=dtype, out=out, where=where)
 
 
 cpdef utils.dpnp_descriptor dpnp_bitwise_or(utils.dpnp_descriptor x1_obj,
@@ -57,7 +57,7 @@ cpdef utils.dpnp_descriptor dpnp_bitwise_or(utils.dpnp_descriptor x1_obj,
                                             object dtype=None,
                                             utils.dpnp_descriptor out=None,
                                             object where=True):
-    return call_fptr_2in_1out_strides(DPNP_FN_BITWISE_OR, x1_obj, x2_obj, dtype=dtype, out=out, where=where)
+    return call_fptr_2in_1out_strides(DPNP_FN_BITWISE_OR_EXT, x1_obj, x2_obj, dtype=dtype, out=out, where=where)
 
 
 cpdef utils.dpnp_descriptor dpnp_bitwise_xor(utils.dpnp_descriptor x1_obj,
@@ -65,11 +65,11 @@ cpdef utils.dpnp_descriptor dpnp_bitwise_xor(utils.dpnp_descriptor x1_obj,
                                              object dtype=None,
                                              utils.dpnp_descriptor out=None,
                                              object where=True):
-    return call_fptr_2in_1out_strides(DPNP_FN_BITWISE_XOR, x1_obj, x2_obj, dtype=dtype, out=out, where=where)
+    return call_fptr_2in_1out_strides(DPNP_FN_BITWISE_XOR_EXT, x1_obj, x2_obj, dtype=dtype, out=out, where=where)
 
 
 cpdef utils.dpnp_descriptor dpnp_invert(utils.dpnp_descriptor arr):
-    return call_fptr_1in_1out(DPNP_FN_INVERT, arr, arr.shape)
+    return call_fptr_1in_1out(DPNP_FN_INVERT_EXT, arr, arr.shape)
 
 
 cpdef utils.dpnp_descriptor dpnp_left_shift(utils.dpnp_descriptor x1_obj,
@@ -77,11 +77,11 @@ cpdef utils.dpnp_descriptor dpnp_left_shift(utils.dpnp_descriptor x1_obj,
                                             object dtype=None,
                                             utils.dpnp_descriptor out=None,
                                             object where=True):
-    return call_fptr_2in_1out_strides(DPNP_FN_LEFT_SHIFT, x1_obj, x2_obj, dtype=dtype, out=out, where=where)
+    return call_fptr_2in_1out_strides(DPNP_FN_LEFT_SHIFT_EXT, x1_obj, x2_obj, dtype=dtype, out=out, where=where)
 
 cpdef utils.dpnp_descriptor dpnp_right_shift(utils.dpnp_descriptor x1_obj,
                                              utils.dpnp_descriptor x2_obj,
                                              object dtype=None,
                                              utils.dpnp_descriptor out=None,
                                              object where=True):
-    return call_fptr_2in_1out_strides(DPNP_FN_RIGHT_SHIFT, x1_obj, x2_obj, dtype=dtype, out=out, where=where)
+    return call_fptr_2in_1out_strides(DPNP_FN_RIGHT_SHIFT_EXT, x1_obj, x2_obj, dtype=dtype, out=out, where=where)
diff --git a/dpnp/dpnp_algo/dpnp_algo_counting.pyx b/dpnp/dpnp_algo/dpnp_algo_counting.pyx
index fa0f1e53b218..119c0d27b692 100644
--- a/dpnp/dpnp_algo/dpnp_algo_counting.pyx
+++ b/dpnp/dpnp_algo/dpnp_algo_counting.pyx
@@ -40,4 +40,4 @@ __all__ += [
 
 
 cpdef utils.dpnp_descriptor dpnp_count_nonzero(utils.dpnp_descriptor x1):
-    return call_fptr_1in_1out(DPNP_FN_COUNT_NONZERO, x1, (1,))
+    return call_fptr_1in_1out(DPNP_FN_COUNT_NONZERO_EXT, x1, (1,))
diff --git a/dpnp/dpnp_algo/dpnp_algo_manipulation.pyx b/dpnp/dpnp_algo/dpnp_algo_manipulation.pyx
index 0303d5a3498a..f035761f12a2 100644
--- a/dpnp/dpnp_algo/dpnp_algo_manipulation.pyx
+++ b/dpnp/dpnp_algo/dpnp_algo_manipulation.pyx
@@ -99,21 +99,31 @@ cpdef dpnp_copyto(utils.dpnp_descriptor dst, utils.dpnp_descriptor src, where=Tr
     cdef shape_type_c src_strides = utils.strides_to_vector(src.strides, src_shape)
 
     # get the FPTR data structure
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_COPYTO, src_type, dst_type)
+    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_COPYTO_EXT, src_type, dst_type)
+
+    _, _, result_sycl_queue = utils.get_common_usm_allocation(dst, src)
+
+    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
+    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
 
     # Call FPTR function
     cdef fptr_1in_1out_strides_t func = <fptr_1in_1out_strides_t > kernel_data.ptr
-    func(dst.get_data(),
-         dst.size,
-         dst.ndim,
-         dst_shape.data(),
-         dst_strides.data(),
-         src.get_data(),
-         src.size,
-         src.ndim,
-         src_shape.data(),
-         src_strides.data(),
-         NULL)
+    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref,
+                                                    dst.get_data(),
+                                                    dst.size,
+                                                    dst.ndim,
+                                                    dst_shape.data(),
+                                                    dst_strides.data(),
+                                                    src.get_data(),
+                                                    src.size,
+                                                    src.ndim,
+                                                    src_shape.data(),
+                                                    src_strides.data(),
+                                                    NULL,
+                                                    NULL)  # dep_events_ref
+
+    with nogil: c_dpctl.DPCTLEvent_Wait(event_ref)
+    c_dpctl.DPCTLEvent_Delete(event_ref)
 
 
 cpdef utils.dpnp_descriptor dpnp_expand_dims(utils.dpnp_descriptor in_array, axis):
diff --git a/dpnp/dpnp_algo/dpnp_algo_mathematical.pyx b/dpnp/dpnp_algo/dpnp_algo_mathematical.pyx
index 2e52c32c10a2..b2c070d95b47 100644
--- a/dpnp/dpnp_algo/dpnp_algo_mathematical.pyx
+++ b/dpnp/dpnp_algo/dpnp_algo_mathematical.pyx
@@ -105,7 +105,7 @@ cpdef utils.dpnp_descriptor dpnp_add(utils.dpnp_descriptor x1_obj,
                                      object dtype=None,
                                      utils.dpnp_descriptor out=None,
                                      object where=True):
-    return call_fptr_2in_1out_strides(DPNP_FN_ADD, x1_obj, x2_obj, dtype, out, where)
+    return call_fptr_2in_1out_strides(DPNP_FN_ADD_EXT, x1_obj, x2_obj, dtype, out, where)
 
 
 cpdef utils.dpnp_descriptor dpnp_arctan2(utils.dpnp_descriptor x1_obj,
@@ -113,7 +113,7 @@ cpdef utils.dpnp_descriptor dpnp_arctan2(utils.dpnp_descriptor x1_obj,
                                          object dtype=None,
                                          utils.dpnp_descriptor out=None,
                                          object where=True):
-    return call_fptr_2in_1out_strides(DPNP_FN_ARCTAN2, x1_obj, x2_obj, dtype, out, where, func_name="arctan2")
+    return call_fptr_2in_1out_strides(DPNP_FN_ARCTAN2_EXT, x1_obj, x2_obj, dtype, out, where, func_name="arctan2")
 
 
 cpdef utils.dpnp_descriptor dpnp_around(utils.dpnp_descriptor x1, int decimals):
@@ -134,11 +134,11 @@ cpdef utils.dpnp_descriptor dpnp_around(utils.dpnp_descriptor x1, int decimals):
 
 
 cpdef utils.dpnp_descriptor dpnp_ceil(utils.dpnp_descriptor x1, utils.dpnp_descriptor out):
-    return call_fptr_1in_1out_strides(DPNP_FN_CEIL, x1, dtype=None, out=out, where=True, func_name='ceil')
+    return call_fptr_1in_1out_strides(DPNP_FN_CEIL_EXT, x1, dtype=None, out=out, where=True, func_name='ceil')
 
 
 cpdef utils.dpnp_descriptor dpnp_conjugate(utils.dpnp_descriptor x1):
-    return call_fptr_1in_1out_strides(DPNP_FN_CONJIGUATE, x1)
+    return call_fptr_1in_1out_strides(DPNP_FN_CONJIGUATE_EXT, x1)
 
 
 cpdef utils.dpnp_descriptor dpnp_copysign(utils.dpnp_descriptor x1_obj,
@@ -146,7 +146,7 @@ cpdef utils.dpnp_descriptor dpnp_copysign(utils.dpnp_descriptor x1_obj,
                                           object dtype=None,
                                           utils.dpnp_descriptor out=None,
                                           object where=True):
-    return call_fptr_2in_1out_strides(DPNP_FN_COPYSIGN, x1_obj, x2_obj, dtype, out, where)
+    return call_fptr_2in_1out_strides(DPNP_FN_COPYSIGN_EXT, x1_obj, x2_obj, dtype, out, where)
 
 
 cpdef utils.dpnp_descriptor dpnp_cross(utils.dpnp_descriptor x1_obj,
@@ -154,7 +154,7 @@ cpdef utils.dpnp_descriptor dpnp_cross(utils.dpnp_descriptor x1_obj,
                                        object dtype=None,
                                        utils.dpnp_descriptor out=None,
                                        object where=True):
-    return call_fptr_2in_1out(DPNP_FN_CROSS, x1_obj, x2_obj, dtype, out, where)
+    return call_fptr_2in_1out(DPNP_FN_CROSS_EXT, x1_obj, x2_obj, dtype, out, where)
 
 
 cpdef utils.dpnp_descriptor dpnp_cumprod(utils.dpnp_descriptor x1):
@@ -166,7 +166,7 @@ cpdef utils.dpnp_descriptor dpnp_cumprod(utils.dpnp_descriptor x1):
     # >>> res.shape
     # (4,)
 
-    return call_fptr_1in_1out(DPNP_FN_CUMPROD, x1, (x1.size,))
+    return call_fptr_1in_1out(DPNP_FN_CUMPROD_EXT, x1, (x1.size,))
 
 
 cpdef utils.dpnp_descriptor dpnp_cumsum(utils.dpnp_descriptor x1):
@@ -178,7 +178,7 @@ cpdef utils.dpnp_descriptor dpnp_cumsum(utils.dpnp_descriptor x1):
     # >>> res.shape
     # (4,)
 
-    return call_fptr_1in_1out(DPNP_FN_CUMSUM, x1, (x1.size,))
+    return call_fptr_1in_1out(DPNP_FN_CUMSUM_EXT, x1, (x1.size,))
 
 
 cpdef utils.dpnp_descriptor dpnp_diff(utils.dpnp_descriptor x1, int n):
@@ -203,7 +203,7 @@ cpdef utils.dpnp_descriptor dpnp_divide(utils.dpnp_descriptor x1_obj,
                                         object dtype=None,
                                         utils.dpnp_descriptor out=None,
                                         object where=True):
-    return call_fptr_2in_1out_strides(DPNP_FN_DIVIDE, x1_obj, x2_obj, dtype, out, where)
+    return call_fptr_2in_1out_strides(DPNP_FN_DIVIDE_EXT, x1_obj, x2_obj, dtype, out, where)
 
 
 cpdef utils.dpnp_descriptor dpnp_ediff1d(utils.dpnp_descriptor x1):
@@ -215,7 +215,7 @@ cpdef utils.dpnp_descriptor dpnp_ediff1d(utils.dpnp_descriptor x1):
     cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(x1.dtype)
 
     # get the FPTR data structure
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_EDIFF1D, param1_type, param1_type)
+    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_EDIFF1D_EXT, param1_type, param1_type)
 
     result_type = dpnp_DPNPFuncType_to_dtype( < size_t > kernel_data.return_type)
 
@@ -223,33 +223,51 @@ cpdef utils.dpnp_descriptor dpnp_ediff1d(utils.dpnp_descriptor x1):
     cdef shape_type_c x1_shape = (x1.size,)
     cdef shape_type_c x1_strides = utils.strides_to_vector(None, x1_shape)
 
+    x1_obj = x1.get_array()
+
     cdef shape_type_c result_shape = (x1.size - 1,)
-    cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, kernel_data.return_type, None)
+    cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape,
+                                                                       kernel_data.return_type,
+                                                                       None,
+                                                                       device=x1_obj.sycl_device,
+                                                                       usm_type=x1_obj.usm_type,
+                                                                       sycl_queue=x1_obj.sycl_queue)
+
     cdef shape_type_c result_strides = utils.strides_to_vector(result.strides, result_shape)
 
+    result_sycl_queue = result.get_array().sycl_queue
+
+    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
+    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
+
     # Call FPTR function
     cdef fptr_1in_1out_strides_t func = <fptr_1in_1out_strides_t > kernel_data.ptr
-    func(result.get_data(),
-         result.size,
-         result.ndim,
-         result_shape.data(),
-         result_strides.data(),
-         x1.get_data(),
-         x1.size,
-         x1.ndim,
-         x1_shape.data(),
-         x1_strides.data(),
-         NULL)
+    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref,
+                                                    result.get_data(),
+                                                    result.size,
+                                                    result.ndim,
+                                                    result_shape.data(),
+                                                    result_strides.data(),
+                                                    x1.get_data(),
+                                                    x1.size,
+                                                    x1.ndim,
+                                                    x1_shape.data(),
+                                                    x1_strides.data(),
+                                                    NULL,
+                                                    NULL)  # dep_events_ref
+
+    with nogil: c_dpctl.DPCTLEvent_Wait(event_ref)
+    c_dpctl.DPCTLEvent_Delete(event_ref)
 
     return result
 
 
 cpdef utils.dpnp_descriptor dpnp_fabs(utils.dpnp_descriptor x1):
-    return call_fptr_1in_1out_strides(DPNP_FN_FABS, x1)
+    return call_fptr_1in_1out_strides(DPNP_FN_FABS_EXT, x1)
 
 
 cpdef utils.dpnp_descriptor dpnp_floor(utils.dpnp_descriptor x1, utils.dpnp_descriptor out):
-    return call_fptr_1in_1out_strides(DPNP_FN_FLOOR, x1, dtype=None, out=out, where=True, func_name='floor')
+    return call_fptr_1in_1out_strides(DPNP_FN_FLOOR_EXT, x1, dtype=None, out=out, where=True, func_name='floor')
 
 
 cpdef utils.dpnp_descriptor dpnp_floor_divide(utils.dpnp_descriptor x1_obj,
@@ -257,7 +275,7 @@ cpdef utils.dpnp_descriptor dpnp_floor_divide(utils.dpnp_descriptor x1_obj,
                                               object dtype=None,
                                               utils.dpnp_descriptor out=None,
                                               object where=True):
-    return call_fptr_2in_1out(DPNP_FN_FLOOR_DIVIDE, x1_obj, x2_obj, dtype, out, where)
+    return call_fptr_2in_1out(DPNP_FN_FLOOR_DIVIDE_EXT, x1_obj, x2_obj, dtype, out, where)
 
 
 cpdef utils.dpnp_descriptor dpnp_fmod(utils.dpnp_descriptor x1_obj,
@@ -265,7 +283,7 @@ cpdef utils.dpnp_descriptor dpnp_fmod(utils.dpnp_descriptor x1_obj,
                                       object dtype=None,
                                       utils.dpnp_descriptor out=None,
                                       object where=True):
-    return call_fptr_2in_1out_strides(DPNP_FN_FMOD, x1_obj, x2_obj, dtype, out, where)
+    return call_fptr_2in_1out_strides(DPNP_FN_FMOD_EXT, x1_obj, x2_obj, dtype, out, where)
 
 
 cpdef utils.dpnp_descriptor dpnp_gradient(utils.dpnp_descriptor y1, int dx=1):
@@ -296,7 +314,7 @@ cpdef utils.dpnp_descriptor dpnp_hypot(utils.dpnp_descriptor x1_obj,
                                        object dtype=None,
                                        utils.dpnp_descriptor out=None,
                                        object where=True):
-    return call_fptr_2in_1out_strides(DPNP_FN_HYPOT, x1_obj, x2_obj, dtype, out, where)
+    return call_fptr_2in_1out_strides(DPNP_FN_HYPOT_EXT, x1_obj, x2_obj, dtype, out, where)
 
 
 cpdef utils.dpnp_descriptor dpnp_maximum(utils.dpnp_descriptor x1_obj,
@@ -304,7 +322,7 @@ cpdef utils.dpnp_descriptor dpnp_maximum(utils.dpnp_descriptor x1_obj,
                                          object dtype=None,
                                          utils.dpnp_descriptor out=None,
                                          object where=True):
-    return call_fptr_2in_1out_strides(DPNP_FN_MAXIMUM, x1_obj, x2_obj, dtype, out, where)
+    return call_fptr_2in_1out_strides(DPNP_FN_MAXIMUM_EXT, x1_obj, x2_obj, dtype, out, where)
 
 
 cpdef utils.dpnp_descriptor dpnp_minimum(utils.dpnp_descriptor x1_obj,
@@ -312,7 +330,7 @@ cpdef utils.dpnp_descriptor dpnp_minimum(utils.dpnp_descriptor x1_obj,
                                          object dtype=None,
                                          utils.dpnp_descriptor out=None,
                                          object where=True):
-    return call_fptr_2in_1out_strides(DPNP_FN_MINIMUM, x1_obj, x2_obj, dtype, out, where)
+    return call_fptr_2in_1out_strides(DPNP_FN_MINIMUM_EXT, x1_obj, x2_obj, dtype, out, where)
 
 
 cpdef tuple dpnp_modf(utils.dpnp_descriptor x1):
@@ -339,7 +357,7 @@ cpdef utils.dpnp_descriptor dpnp_multiply(utils.dpnp_descriptor x1_obj,
                                           object dtype=None,
                                           utils.dpnp_descriptor out=None,
                                           object where=True):
-    return call_fptr_2in_1out_strides(DPNP_FN_MULTIPLY, x1_obj, x2_obj, dtype, out, where)
+    return call_fptr_2in_1out_strides(DPNP_FN_MULTIPLY_EXT, x1_obj, x2_obj, dtype, out, where)
 
 
 cpdef utils.dpnp_descriptor dpnp_nancumprod(utils.dpnp_descriptor x1):
@@ -397,7 +415,7 @@ cpdef utils.dpnp_descriptor dpnp_nansum(utils.dpnp_descriptor x1):
 
 
 cpdef utils.dpnp_descriptor dpnp_negative(dpnp_descriptor x1):
-    return call_fptr_1in_1out_strides(DPNP_FN_NEGATIVE, x1)
+    return call_fptr_1in_1out_strides(DPNP_FN_NEGATIVE_EXT, x1)
 
 
 cpdef utils.dpnp_descriptor dpnp_power(utils.dpnp_descriptor x1_obj,
@@ -405,7 +423,7 @@ cpdef utils.dpnp_descriptor dpnp_power(utils.dpnp_descriptor x1_obj,
                                        object dtype=None,
                                        utils.dpnp_descriptor out=None,
                                        object where=True):
-    return call_fptr_2in_1out_strides(DPNP_FN_POWER, x1_obj, x2_obj, dtype, out, where, func_name="power")
+    return call_fptr_2in_1out_strides(DPNP_FN_POWER_EXT, x1_obj, x2_obj, dtype, out, where, func_name="power")
 
 
 cpdef utils.dpnp_descriptor dpnp_prod(utils.dpnp_descriptor input,
@@ -452,11 +470,11 @@ cpdef utils.dpnp_descriptor dpnp_remainder(utils.dpnp_descriptor x1_obj,
                                            object dtype=None,
                                            utils.dpnp_descriptor out=None,
                                            object where=True):
-    return call_fptr_2in_1out(DPNP_FN_REMAINDER, x1_obj, x2_obj, dtype, out, where)
+    return call_fptr_2in_1out(DPNP_FN_REMAINDER_EXT, x1_obj, x2_obj, dtype, out, where)
 
 
 cpdef utils.dpnp_descriptor dpnp_sign(utils.dpnp_descriptor x1):
-    return call_fptr_1in_1out_strides(DPNP_FN_SIGN, x1)
+    return call_fptr_1in_1out_strides(DPNP_FN_SIGN_EXT, x1)
 
 
 cpdef utils.dpnp_descriptor dpnp_subtract(utils.dpnp_descriptor x1_obj,
@@ -464,7 +482,7 @@ cpdef utils.dpnp_descriptor dpnp_subtract(utils.dpnp_descriptor x1_obj,
                                           object dtype=None,
                                           utils.dpnp_descriptor out=None,
                                           object where=True):
-    return call_fptr_2in_1out_strides(DPNP_FN_SUBTRACT, x1_obj, x2_obj, dtype, out, where)
+    return call_fptr_2in_1out_strides(DPNP_FN_SUBTRACT_EXT, x1_obj, x2_obj, dtype, out, where)
 
 
 cpdef utils.dpnp_descriptor dpnp_sum(utils.dpnp_descriptor input,
@@ -514,4 +532,4 @@ cpdef utils.dpnp_descriptor dpnp_trapz(utils.dpnp_descriptor y1, utils.dpnp_desc
 
 
 cpdef utils.dpnp_descriptor dpnp_trunc(utils.dpnp_descriptor x1, utils.dpnp_descriptor out):
-    return call_fptr_1in_1out_strides(DPNP_FN_TRUNC, x1, dtype=None, out=out, where=True, func_name='trunc')
+    return call_fptr_1in_1out_strides(DPNP_FN_TRUNC_EXT, x1, dtype=None, out=out, where=True, func_name='trunc')
diff --git a/dpnp/dpnp_algo/dpnp_algo_sorting.pyx b/dpnp/dpnp_algo/dpnp_algo_sorting.pyx
index 2fbb37d52df7..93a76e81a18b 100644
--- a/dpnp/dpnp_algo/dpnp_algo_sorting.pyx
+++ b/dpnp/dpnp_algo/dpnp_algo_sorting.pyx
@@ -50,7 +50,7 @@ cpdef utils.dpnp_descriptor dpnp_argsort(utils.dpnp_descriptor x1):
     cdef shape_type_c result_shape = x1.shape
     if result_shape == ():
         result_shape = (1,)
-    return call_fptr_1in_1out(DPNP_FN_ARGSORT, x1, result_shape)
+    return call_fptr_1in_1out(DPNP_FN_ARGSORT_EXT, x1, result_shape)
 
 
 cpdef utils.dpnp_descriptor dpnp_partition(utils.dpnp_descriptor arr, int kth, axis=-1, kind='introselect', order=None):
@@ -92,4 +92,4 @@ cpdef utils.dpnp_descriptor dpnp_searchsorted(utils.dpnp_descriptor arr, utils.d
 
 
 cpdef utils.dpnp_descriptor dpnp_sort(utils.dpnp_descriptor x1):
-    return call_fptr_1in_1out(DPNP_FN_SORT, x1, x1.shape)
+    return call_fptr_1in_1out(DPNP_FN_SORT_EXT, x1, x1.shape)
diff --git a/dpnp/dpnp_algo/dpnp_algo_special.pyx b/dpnp/dpnp_algo/dpnp_algo_special.pyx
index a4a06613efb7..fb6ff0d74cc6 100644
--- a/dpnp/dpnp_algo/dpnp_algo_special.pyx
+++ b/dpnp/dpnp_algo/dpnp_algo_special.pyx
@@ -40,4 +40,4 @@ __all__ += [
 
 
 cpdef utils.dpnp_descriptor dpnp_erf(utils.dpnp_descriptor x1):
-    return call_fptr_1in_1out_strides(DPNP_FN_ERF, x1)
+    return call_fptr_1in_1out_strides(DPNP_FN_ERF_EXT, x1)
diff --git a/dpnp/dpnp_algo/dpnp_algo_statistics.pyx b/dpnp/dpnp_algo/dpnp_algo_statistics.pyx
index 596b9c4e853a..5ff4c785d96f 100644
--- a/dpnp/dpnp_algo/dpnp_algo_statistics.pyx
+++ b/dpnp/dpnp_algo/dpnp_algo_statistics.pyx
@@ -103,16 +103,39 @@ cpdef utils.dpnp_descriptor dpnp_correlate(utils.dpnp_descriptor x1, utils.dpnp_
     cdef shape_type_c x1_shape = x1.shape
     cdef shape_type_c x2_shape = x2.shape
 
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_CORRELATE, param1_type, param2_type)
+    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_CORRELATE_EXT, param1_type, param2_type)
+
+    result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(x1, x2)
 
     # ceate result array with type given by FPTR data
     cdef shape_type_c result_shape = (1,)
-    cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, kernel_data.return_type, None)
+    cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape,
+                                                                       kernel_data.return_type,
+                                                                       None,
+                                                                       device=result_sycl_device,
+                                                                       usm_type=result_usm_type,
+                                                                       sycl_queue=result_sycl_queue)
+
+    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
+    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
 
     cdef fptr_2in_1out_t func = <fptr_2in_1out_t > kernel_data.ptr
 
-    func(result.get_data(), x1.get_data(), x1.size, x1_shape.data(), x1_shape.size(),
-         x2.get_data(), x2.size, x2_shape.data(), x2_shape.size(), NULL)
+    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref,
+                                                    result.get_data(),
+                                                    x1.get_data(),
+                                                    x1.size,
+                                                    x1_shape.data(),
+                                                    x1_shape.size(),
+                                                    x2.get_data(),
+                                                    x2.size,
+                                                    x2_shape.data(),
+                                                    x2_shape.size(),
+                                                    NULL,
+                                                    NULL)  # dep_events_ref
+
+    with nogil: c_dpctl.DPCTLEvent_Wait(event_ref)
+    c_dpctl.DPCTLEvent_Delete(event_ref)
 
     return result
 
diff --git a/dpnp/dpnp_algo/dpnp_algo_trigonometric.pyx b/dpnp/dpnp_algo/dpnp_algo_trigonometric.pyx
index 89644fc6d8d9..6edfb87042fb 100644
--- a/dpnp/dpnp_algo/dpnp_algo_trigonometric.pyx
+++ b/dpnp/dpnp_algo/dpnp_algo_trigonometric.pyx
@@ -65,103 +65,103 @@ __all__ += [
 
 
 cpdef utils.dpnp_descriptor dpnp_arccos(utils.dpnp_descriptor x1):
-    return call_fptr_1in_1out_strides(DPNP_FN_ARCCOS, x1)
+    return call_fptr_1in_1out_strides(DPNP_FN_ARCCOS_EXT, x1)
 
 
 cpdef utils.dpnp_descriptor dpnp_arccosh(utils.dpnp_descriptor x1):
-    return call_fptr_1in_1out_strides(DPNP_FN_ARCCOSH, x1)
+    return call_fptr_1in_1out_strides(DPNP_FN_ARCCOSH_EXT, x1)
 
 
 cpdef utils.dpnp_descriptor dpnp_arcsin(utils.dpnp_descriptor x1, utils.dpnp_descriptor out):
-    return call_fptr_1in_1out_strides(DPNP_FN_ARCSIN, x1, dtype=None, out=out, where=True, func_name='arcsin')
+    return call_fptr_1in_1out_strides(DPNP_FN_ARCSIN_EXT, x1, dtype=None, out=out, where=True, func_name='arcsin')
 
 
 cpdef utils.dpnp_descriptor dpnp_arcsinh(utils.dpnp_descriptor x1):
-    return call_fptr_1in_1out_strides(DPNP_FN_ARCSINH, x1)
+    return call_fptr_1in_1out_strides(DPNP_FN_ARCSINH_EXT, x1)
 
 
 cpdef utils.dpnp_descriptor dpnp_arctan(utils.dpnp_descriptor x1, utils.dpnp_descriptor out):
-    return call_fptr_1in_1out_strides(DPNP_FN_ARCTAN, x1, dtype=None, out=out, where=True, func_name='arctan')
+    return call_fptr_1in_1out_strides(DPNP_FN_ARCTAN_EXT, x1, dtype=None, out=out, where=True, func_name='arctan')
 
 
 cpdef utils.dpnp_descriptor dpnp_arctanh(utils.dpnp_descriptor x1):
-    return call_fptr_1in_1out_strides(DPNP_FN_ARCTANH, x1)
+    return call_fptr_1in_1out_strides(DPNP_FN_ARCTANH_EXT, x1)
 
 
 cpdef utils.dpnp_descriptor dpnp_cbrt(utils.dpnp_descriptor x1):
-    return call_fptr_1in_1out_strides(DPNP_FN_CBRT, x1)
+    return call_fptr_1in_1out_strides(DPNP_FN_CBRT_EXT, x1)
 
 
 cpdef utils.dpnp_descriptor dpnp_cos(utils.dpnp_descriptor x1, utils.dpnp_descriptor out):
-    return call_fptr_1in_1out_strides(DPNP_FN_COS, x1, dtype=None, out=out, where=True, func_name='cos')
+    return call_fptr_1in_1out_strides(DPNP_FN_COS_EXT, x1, dtype=None, out=out, where=True, func_name='cos')
 
 
 cpdef utils.dpnp_descriptor dpnp_cosh(utils.dpnp_descriptor x1):
-    return call_fptr_1in_1out_strides(DPNP_FN_COSH, x1)
+    return call_fptr_1in_1out_strides(DPNP_FN_COSH_EXT, x1)
 
 
 cpdef utils.dpnp_descriptor dpnp_degrees(utils.dpnp_descriptor x1):
-    return call_fptr_1in_1out_strides(DPNP_FN_DEGREES, x1)
+    return call_fptr_1in_1out_strides(DPNP_FN_DEGREES_EXT, x1)
 
 
 cpdef utils.dpnp_descriptor dpnp_exp(utils.dpnp_descriptor x1, utils.dpnp_descriptor out):
-    return call_fptr_1in_1out_strides(DPNP_FN_EXP, x1, dtype=None, out=out, where=True, func_name='exp')
+    return call_fptr_1in_1out_strides(DPNP_FN_EXP_EXT, x1, dtype=None, out=out, where=True, func_name='exp')
 
 
 cpdef utils.dpnp_descriptor dpnp_exp2(utils.dpnp_descriptor x1):
-    return call_fptr_1in_1out_strides(DPNP_FN_EXP2, x1)
+    return call_fptr_1in_1out_strides(DPNP_FN_EXP2_EXT, x1)
 
 
 cpdef utils.dpnp_descriptor dpnp_expm1(utils.dpnp_descriptor x1):
-    return call_fptr_1in_1out_strides(DPNP_FN_EXPM1, x1)
+    return call_fptr_1in_1out_strides(DPNP_FN_EXPM1_EXT, x1)
 
 
 cpdef utils.dpnp_descriptor dpnp_log(utils.dpnp_descriptor x1, utils.dpnp_descriptor out):
-    return call_fptr_1in_1out_strides(DPNP_FN_LOG, x1, dtype=None, out=out, where=True, func_name='log')
+    return call_fptr_1in_1out_strides(DPNP_FN_LOG_EXT, x1, dtype=None, out=out, where=True, func_name='log')
 
 
 cpdef utils.dpnp_descriptor dpnp_log10(utils.dpnp_descriptor x1):
-    return call_fptr_1in_1out_strides(DPNP_FN_LOG10, x1)
+    return call_fptr_1in_1out_strides(DPNP_FN_LOG10_EXT, x1)
 
 
 cpdef utils.dpnp_descriptor dpnp_log1p(utils.dpnp_descriptor x1):
-    return call_fptr_1in_1out_strides(DPNP_FN_LOG1P, x1)
+    return call_fptr_1in_1out_strides(DPNP_FN_LOG1P_EXT, x1)
 
 
 cpdef utils.dpnp_descriptor dpnp_log2(utils.dpnp_descriptor x1):
-    return call_fptr_1in_1out_strides(DPNP_FN_LOG2, x1)
+    return call_fptr_1in_1out_strides(DPNP_FN_LOG2_EXT, x1)
 
 
 cpdef utils.dpnp_descriptor dpnp_recip(utils.dpnp_descriptor x1):
-    return call_fptr_1in_1out_strides(DPNP_FN_RECIP, x1)
+    return call_fptr_1in_1out_strides(DPNP_FN_RECIP_EXT, x1)
 
 
 cpdef utils.dpnp_descriptor dpnp_radians(utils.dpnp_descriptor x1):
-    return call_fptr_1in_1out_strides(DPNP_FN_RADIANS, x1)
+    return call_fptr_1in_1out_strides(DPNP_FN_RADIANS_EXT, x1)
 
 
 cpdef utils.dpnp_descriptor dpnp_sin(utils.dpnp_descriptor x1, utils.dpnp_descriptor out):
-    return call_fptr_1in_1out_strides(DPNP_FN_SIN, x1, dtype=None, out=out, where=True, func_name='sin')
+    return call_fptr_1in_1out_strides(DPNP_FN_SIN_EXT, x1, dtype=None, out=out, where=True, func_name='sin')
 
 
 cpdef utils.dpnp_descriptor dpnp_sinh(utils.dpnp_descriptor x1):
-    return call_fptr_1in_1out_strides(DPNP_FN_SINH, x1)
+    return call_fptr_1in_1out_strides(DPNP_FN_SINH_EXT, x1)
 
 
 cpdef utils.dpnp_descriptor dpnp_sqrt(utils.dpnp_descriptor x1):
-    return call_fptr_1in_1out_strides(DPNP_FN_SQRT, x1)
+    return call_fptr_1in_1out_strides(DPNP_FN_SQRT_EXT, x1)
 
 
 cpdef utils.dpnp_descriptor dpnp_square(utils.dpnp_descriptor x1):
-    return call_fptr_1in_1out_strides(DPNP_FN_SQUARE, x1)
+    return call_fptr_1in_1out_strides(DPNP_FN_SQUARE_EXT, x1)
 
 
 cpdef utils.dpnp_descriptor dpnp_tan(utils.dpnp_descriptor x1, utils.dpnp_descriptor out):
-    return call_fptr_1in_1out_strides(DPNP_FN_TAN, x1, dtype=None, out=out, where=True, func_name='tan')
+    return call_fptr_1in_1out_strides(DPNP_FN_TAN_EXT, x1, dtype=None, out=out, where=True, func_name='tan')
 
 
 cpdef utils.dpnp_descriptor dpnp_tanh(utils.dpnp_descriptor x1):
-    return call_fptr_1in_1out_strides(DPNP_FN_TANH, x1)
+    return call_fptr_1in_1out_strides(DPNP_FN_TANH_EXT, x1)
 
 
 cpdef utils.dpnp_descriptor dpnp_unwrap(utils.dpnp_descriptor array1):

From d7783af49ebaab2588be3d6e683fcea0f38bef40 Mon Sep 17 00:00:00 2001
From: Denis Smirnov <denis.smirnov@intel.com>
Date: Fri, 4 Feb 2022 04:42:29 -0600
Subject: [PATCH 02/18] Extand main cython funcs with queues and events (part
 2)

---
 dpnp/backend/include/dpnp_iface.hpp           |   4 +-
 .../kernels/dpnp_krnl_arraycreation.cpp       |  15 +-
 dpnp/backend/kernels/dpnp_krnl_indexing.cpp   |   9 +-
 dpnp/backend/kernels/dpnp_krnl_linalg.cpp     |   4 +-
 dpnp/backend/kernels/dpnp_krnl_logic.cpp      |  12 +-
 .../kernels/dpnp_krnl_manipulation.cpp        |   4 +-
 .../kernels/dpnp_krnl_mathematical.cpp        |  12 +-
 dpnp/backend/kernels/dpnp_krnl_statistics.cpp |   8 +-
 dpnp/dpnp_algo/dpnp_algo.pxd                  |  11 +-
 dpnp/dpnp_algo/dpnp_algo.pyx                  |  18 +-
 dpnp/dpnp_algo/dpnp_algo_arraycreation.pyx    |  99 ++++-
 dpnp/dpnp_algo/dpnp_algo_indexing.pyx         | 355 ++++++++++++++----
 dpnp/dpnp_algo/dpnp_algo_linearalgebra.pyx    | 115 ++++--
 dpnp/dpnp_algo/dpnp_algo_logic.pyx            | 189 ++++++++--
 dpnp/dpnp_algo/dpnp_algo_manipulation.pyx     | 114 ++++--
 dpnp/dpnp_algo/dpnp_algo_mathematical.pyx     | 256 ++++++++++---
 dpnp/dpnp_algo/dpnp_algo_searching.pyx        |  54 ++-
 dpnp/dpnp_algo/dpnp_algo_sorting.pyx          |  74 +++-
 dpnp/dpnp_algo/dpnp_algo_statistics.pyx       | 335 ++++++++++++-----
 dpnp/dpnp_algo/dpnp_algo_trigonometric.pyx    |   9 +-
 20 files changed, 1325 insertions(+), 372 deletions(-)

diff --git a/dpnp/backend/include/dpnp_iface.hpp b/dpnp/backend/include/dpnp_iface.hpp
index 5b02d087b340..42c05f0fd61d 100644
--- a/dpnp/backend/include/dpnp_iface.hpp
+++ b/dpnp/backend/include/dpnp_iface.hpp
@@ -533,7 +533,7 @@ INP_DLLEXPORT void dpnp_cumprod_c(void* array1_in, void* result1, size_t size);
  * @param [in]  q_ref               Reference to SYCL queue.
  * @param [in]  array1_in           Input array.
  * @param [out] result1             Output array.
- * @param [in]  size                 Number of elements in input arrays.
+ * @param [in]  size                Number of elements in input arrays.
  * @param [in]  dep_event_vec_ref   Reference to vector of SYCL events.
  *
  */
@@ -551,7 +551,7 @@ INP_DLLEXPORT void dpnp_cumsum_c(void* array1_in, void* result1, size_t size);
  * @ingroup BACKEND_API
  * @brief The differences between consecutive elements of an array.
  *
- * @param [in]  q_ref             Reference to SYCL queue.
+ * @param [in]  q_ref               Reference to SYCL queue.
  * @param [out] result_out          Output array.
  * @param [in]  result_size         Size of output array.
  * @param [in]  result_ndim         Number of output array dimensions.
diff --git a/dpnp/backend/kernels/dpnp_krnl_arraycreation.cpp b/dpnp/backend/kernels/dpnp_krnl_arraycreation.cpp
index 9f1b742c84e1..106036156739 100644
--- a/dpnp/backend/kernels/dpnp_krnl_arraycreation.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_arraycreation.cpp
@@ -617,9 +617,6 @@ DPCTLSyclEventRef dpnp_vander_c(DPCTLSyclQueueRef q_ref,
                                 const int increasing,
                                 const DPCTLEventVectorRef dep_event_vec_ref)
 {
-    // avoid warning unused variable
-    (void)dep_event_vec_ref;
-
     DPCTLSyclEventRef event_ref = nullptr;
 
     if ((array1_in == nullptr) || (result1 == nullptr))
@@ -637,8 +634,7 @@ DPCTLSyclEventRef dpnp_vander_c(DPCTLSyclQueueRef q_ref,
 
     if (N == 1)
     {
-        dpnp_ones_c<_DataType_output>(result, size_in);
-        return event_ref;
+        return dpnp_ones_c<_DataType_output>(q_ref, result, size_in, dep_event_vec_ref);
     }
 
     if (increasing)
@@ -762,9 +758,10 @@ DPCTLSyclEventRef dpnp_trace_c(DPCTLSyclQueueRef q_ref,
     };
 
     auto event = q.submit(kernel_func);
-    event.wait();
 
-    return event_ref;
+    event_ref = reinterpret_cast<DPCTLSyclEventRef>(&event);
+
+    return DPCTLEvent_Copy(event_ref);
 }
 
 template <typename _DataType, typename _ResultType>
@@ -849,9 +846,9 @@ DPCTLSyclEventRef dpnp_tri_c(DPCTLSyclQueueRef q_ref,
 
     event = q.submit(kernel_func);
 
-    event.wait();
+    event_ref = reinterpret_cast<DPCTLSyclEventRef>(&event);
 
-    return event_ref;
+    return DPCTLEvent_Copy(event_ref);
 }
 
 template <typename _DataType>
diff --git a/dpnp/backend/kernels/dpnp_krnl_indexing.cpp b/dpnp/backend/kernels/dpnp_krnl_indexing.cpp
index e14e4d1c8b32..66da163c86b5 100644
--- a/dpnp/backend/kernels/dpnp_krnl_indexing.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_indexing.cpp
@@ -87,9 +87,10 @@ DPCTLSyclEventRef dpnp_choose_c(DPCTLSyclQueueRef q_ref,
     };
 
     sycl::event event = q.submit(kernel_func);
-    event.wait();
 
-    return event_ref;
+    event_ref = reinterpret_cast<DPCTLSyclEventRef>(&event);
+
+    return DPCTLEvent_Copy(event_ref);
 }
 
 template <typename _DataType1, typename _DataType2>
@@ -917,9 +918,9 @@ DPCTLSyclEventRef dpnp_take_c(DPCTLSyclQueueRef q_ref,
 
     sycl::event event = q.submit(kernel_func);
 
-    event.wait();
+    event_ref = reinterpret_cast<DPCTLSyclEventRef>(&event);
 
-    return event_ref;
+    return DPCTLEvent_Copy(event_ref);
 }
 
 template <typename _DataType, typename _IndecesType>
diff --git a/dpnp/backend/kernels/dpnp_krnl_linalg.cpp b/dpnp/backend/kernels/dpnp_krnl_linalg.cpp
index fcc2a908343e..a56b4f751649 100644
--- a/dpnp/backend/kernels/dpnp_krnl_linalg.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_linalg.cpp
@@ -502,9 +502,9 @@ DPCTLSyclEventRef dpnp_kron_c(DPCTLSyclQueueRef q_ref,
 
     sycl::event event = q.submit(kernel_func);
 
-    event.wait();
+    event_ref = reinterpret_cast<DPCTLSyclEventRef>(&event);
 
-    return event_ref;
+    return DPCTLEvent_Copy(event_ref);
 }
 
 template <typename _DataType1, typename _DataType2, typename _ResultType>
diff --git a/dpnp/backend/kernels/dpnp_krnl_logic.cpp b/dpnp/backend/kernels/dpnp_krnl_logic.cpp
index cb323734aebf..109246913589 100644
--- a/dpnp/backend/kernels/dpnp_krnl_logic.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_logic.cpp
@@ -81,9 +81,9 @@ DPCTLSyclEventRef dpnp_all_c(DPCTLSyclQueueRef q_ref,
 
     event = q.submit(kernel_func);
 
-    event.wait();
+    event_ref = reinterpret_cast<DPCTLSyclEventRef>(&event);
 
-    return event_ref;
+    return DPCTLEvent_Copy(event_ref);
 }
 
 template <typename _DataType, typename _ResultType>
@@ -166,9 +166,9 @@ DPCTLSyclEventRef dpnp_allclose_c(DPCTLSyclQueueRef q_ref,
 
     event = q.submit(kernel_func);
 
-    event.wait();
+    event_ref = reinterpret_cast<DPCTLSyclEventRef>(&event);
 
-    return event_ref;
+    return DPCTLEvent_Copy(event_ref);
 }
 
 template <typename _DataType1, typename _DataType2, typename _ResultType>
@@ -258,9 +258,9 @@ DPCTLSyclEventRef dpnp_any_c(DPCTLSyclQueueRef q_ref,
 
     event = q.submit(kernel_func);
 
-    event.wait();
+    event_ref = reinterpret_cast<DPCTLSyclEventRef>(&event);
 
-    return event_ref;
+    return DPCTLEvent_Copy(event_ref);
 }
 
 template <typename _DataType, typename _ResultType>
diff --git a/dpnp/backend/kernels/dpnp_krnl_manipulation.cpp b/dpnp/backend/kernels/dpnp_krnl_manipulation.cpp
index c87ab1bf12e7..8a122dbf7283 100644
--- a/dpnp/backend/kernels/dpnp_krnl_manipulation.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_manipulation.cpp
@@ -80,9 +80,9 @@ DPCTLSyclEventRef dpnp_repeat_c(DPCTLSyclQueueRef q_ref,
 
     event = q.submit(kernel_func);
 
-    event.wait();
+    event_ref = reinterpret_cast<DPCTLSyclEventRef>(&event);
 
-    return event_ref;
+    return DPCTLEvent_Copy(event_ref);
 }
 
 template <typename _DataType>
diff --git a/dpnp/backend/kernels/dpnp_krnl_mathematical.cpp b/dpnp/backend/kernels/dpnp_krnl_mathematical.cpp
index bf69fce8e416..dc2493094a26 100644
--- a/dpnp/backend/kernels/dpnp_krnl_mathematical.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_mathematical.cpp
@@ -84,9 +84,9 @@ DPCTLSyclEventRef dpnp_around_c(DPCTLSyclQueueRef q_ref,
         event = q.submit(kernel_func);
     }
 
-    event.wait();
+    event_ref = reinterpret_cast<DPCTLSyclEventRef>(&event);
 
-    return event_ref;
+    return DPCTLEvent_Copy(event_ref);
 }
 
 template <typename _DataType>
@@ -170,9 +170,9 @@ DPCTLSyclEventRef dpnp_elemwise_absolute_c(DPCTLSyclQueueRef q_ref,
         event = q.submit(kernel_func);
     }
 
-    event.wait();
+    event_ref = reinterpret_cast<DPCTLSyclEventRef>(&event);
 
-    return event_ref;
+    return DPCTLEvent_Copy(event_ref);
 }
 
 template <typename _DataType>
@@ -753,9 +753,9 @@ DPCTLSyclEventRef dpnp_modf_c(DPCTLSyclQueueRef q_ref,
         event = q.submit(kernel_func);
     }
 
-    event.wait();
+    event_ref = reinterpret_cast<DPCTLSyclEventRef>(&event);
 
-    return event_ref;
+    return DPCTLEvent_Copy(event_ref);
 }
 
 template <typename _DataType_input, typename _DataType_output>
diff --git a/dpnp/backend/kernels/dpnp_krnl_statistics.cpp b/dpnp/backend/kernels/dpnp_krnl_statistics.cpp
index 70512c17f5f5..3e693473ca01 100644
--- a/dpnp/backend/kernels/dpnp_krnl_statistics.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_statistics.cpp
@@ -598,6 +598,10 @@ DPCTLSyclEventRef dpnp_mean_c(DPCTLSyclQueueRef q_ref,
         sycl::event event = mkl_stats::mean(q, dataset, result);
 
         event.wait();
+
+        event_ref = reinterpret_cast<DPCTLSyclEventRef>(&event);
+
+        return DPCTLEvent_Copy(event_ref);
     }
     else
     {
@@ -608,9 +612,9 @@ DPCTLSyclEventRef dpnp_mean_c(DPCTLSyclQueueRef q_ref,
         result[0] = sum[0] / static_cast<_ResultType>(size);
 
         sycl::free(sum, q);
-    }
 
-    return event_ref;
+        return event_ref;
+    }
 }
 
 template <typename _DataType, typename _ResultType>
diff --git a/dpnp/dpnp_algo/dpnp_algo.pxd b/dpnp/dpnp_algo/dpnp_algo.pxd
index a4f4156a8ed6..3360164830d9 100644
--- a/dpnp/dpnp_algo/dpnp_algo.pxd
+++ b/dpnp/dpnp_algo/dpnp_algo.pxd
@@ -438,7 +438,16 @@ ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_2in_1out_strides_t)(c_dpctl.DPCTLSyclQu
                                                              const long * ,
                                                              const c_dpctl.DPCTLEventVectorRef)
 ctypedef void(*fptr_blas_gemm_2in_1out_t)(void *, void * , void * , size_t, size_t, size_t)
-ctypedef void(*dpnp_reduction_c_t)(void *, const void * , const shape_elem_type*, const size_t, const shape_elem_type*, const size_t, const void * , const long*)
+ctypedef c_dpctl.DPCTLSyclEventRef(*dpnp_reduction_c_t)(c_dpctl.DPCTLSyclQueueRef,
+                                                        void *,
+                                                        const void * ,
+                                                        const shape_elem_type*,
+                                                        const size_t,
+                                                        const shape_elem_type*,
+                                                        const size_t,
+                                                        const void * ,
+                                                        const long*,
+                                                        const c_dpctl.DPCTLEventVectorRef)
 
 cpdef dpnp_descriptor dpnp_astype(dpnp_descriptor x1, dtype)
 cpdef dpnp_descriptor dpnp_flatten(dpnp_descriptor x1)
diff --git a/dpnp/dpnp_algo/dpnp_algo.pyx b/dpnp/dpnp_algo/dpnp_algo.pyx
index 3018776e596d..5f30dbffbd95 100644
--- a/dpnp/dpnp_algo/dpnp_algo.pyx
+++ b/dpnp/dpnp_algo/dpnp_algo.pyx
@@ -119,7 +119,7 @@ cpdef utils.dpnp_descriptor dpnp_arange(start, stop, step, dtype):
                                                     result.get_data(),
                                                     result.size,
                                                     NULL)  # dep_events_ref)
-    with nogil: c_dpctl.DPCTLEvent_Wait(event_ref)
+    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
     c_dpctl.DPCTLEvent_Delete(event_ref)
 
     return result
@@ -150,7 +150,7 @@ cpdef utils.dpnp_descriptor dpnp_astype(utils.dpnp_descriptor x1, dtype):
     cdef fptr_dpnp_astype_t func = <fptr_dpnp_astype_t > kernel_data.ptr
     cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, x1.get_data(), result.get_data(), x1.size, NULL)
 
-    with nogil: c_dpctl.DPCTLEvent_Wait(event_ref)
+    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
     c_dpctl.DPCTLEvent_Delete(event_ref)
 
     return result
@@ -197,7 +197,7 @@ cpdef utils.dpnp_descriptor dpnp_flatten(utils.dpnp_descriptor x1):
                                                     NULL,
                                                     NULL)  # dep_events_ref
 
-    with nogil: c_dpctl.DPCTLEvent_Wait(event_ref)
+    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
     c_dpctl.DPCTLEvent_Delete(event_ref)
 
     return result
@@ -230,7 +230,7 @@ cpdef utils.dpnp_descriptor dpnp_init_val(shape, dtype, value):
     cdef fptr_dpnp_initval_t func = <fptr_dpnp_initval_t > kernel_data.ptr
     cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, result.get_data(), val_arr.get_data(), result.size, NULL)
 
-    with nogil: c_dpctl.DPCTLEvent_Wait(event_ref)
+    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
     c_dpctl.DPCTLEvent_Delete(event_ref)
 
     return result
@@ -340,7 +340,7 @@ cdef utils.dpnp_descriptor call_fptr_1out(DPNPFuncName fptr_name,
     # Call FPTR function
     cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, result.get_data(), result.size, NULL)
 
-    with nogil: c_dpctl.DPCTLEvent_Wait(event_ref)
+    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
     c_dpctl.DPCTLEvent_Delete(event_ref)
 
     return result
@@ -388,7 +388,7 @@ cdef utils.dpnp_descriptor call_fptr_1in_1out(DPNPFuncName fptr_name,
 
     cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, x1.get_data(), result.get_data(), x1.size, NULL)
 
-    with nogil: c_dpctl.DPCTLEvent_Wait(event_ref)
+    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
     c_dpctl.DPCTLEvent_Delete(event_ref)
 
     return result
@@ -455,7 +455,7 @@ cdef utils.dpnp_descriptor call_fptr_1in_1out_strides(DPNPFuncName fptr_name,
                                                     NULL,
                                                     NULL)  # dep_events_ref
 
-    with nogil: c_dpctl.DPCTLEvent_Wait(event_ref)
+    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
     c_dpctl.DPCTLEvent_Delete(event_ref)
 
     return result
@@ -519,7 +519,7 @@ cdef utils.dpnp_descriptor call_fptr_2in_1out(DPNPFuncName fptr_name,
                                                     NULL,
                                                     NULL)  # dep_events_ref)
 
-    with nogil: c_dpctl.DPCTLEvent_Wait(event_ref)
+    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
     c_dpctl.DPCTLEvent_Delete(event_ref)
 
     return result
@@ -596,7 +596,7 @@ cdef utils.dpnp_descriptor call_fptr_2in_1out_strides(DPNPFuncName fptr_name,
                                                     NULL,
                                                     NULL)  # dep_events_ref)
 
-    with nogil: c_dpctl.DPCTLEvent_Wait(event_ref)
+    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
     c_dpctl.DPCTLEvent_Delete(event_ref)
 
     return result
diff --git a/dpnp/dpnp_algo/dpnp_algo_arraycreation.pyx b/dpnp/dpnp_algo/dpnp_algo_arraycreation.pyx
index 11912d2423cc..dd6545d4e528 100644
--- a/dpnp/dpnp_algo/dpnp_algo_arraycreation.pyx
+++ b/dpnp/dpnp_algo/dpnp_algo_arraycreation.pyx
@@ -67,7 +67,9 @@ ctypedef c_dpctl.DPCTLSyclEventRef(*custom_1in_1out_func_ptr_t)(c_dpctl.DPCTLSyc
                                                                 const size_t,
                                                                 const size_t,
                                                                 const c_dpctl.DPCTLEventVectorRef)
-ctypedef void(*ftpr_custom_vander_1in_1out_t)(void * , void * , size_t, size_t, int)
+ctypedef c_dpctl.DPCTLSyclEventRef(*ftpr_custom_vander_1in_1out_t)(c_dpctl.DPCTLSyclQueueRef,
+                                                                   void * , void * , size_t, size_t, int,
+                                                                   const c_dpctl.DPCTLEventVectorRef)
 ctypedef c_dpctl.DPCTLSyclEventRef(*custom_arraycreation_1in_1out_func_ptr_t)(c_dpctl.DPCTLSyclQueueRef,
                                                                               void *,
                                                                               const size_t,
@@ -82,11 +84,21 @@ ctypedef c_dpctl.DPCTLSyclEventRef(*custom_arraycreation_1in_1out_func_ptr_t)(c_
                                                                               const shape_elem_type *,
                                                                               const size_t,
                                                                               const c_dpctl.DPCTLEventVectorRef)
-ctypedef void(*custom_indexing_1out_func_ptr_t)(void * , const size_t , const size_t , const int)
+ctypedef c_dpctl.DPCTLSyclEventRef(*custom_indexing_1out_func_ptr_t)(c_dpctl.DPCTLSyclQueueRef,
+                                                                     void * ,
+                                                                     const size_t ,
+                                                                     const size_t ,
+                                                                     const int,
+                                                                     const c_dpctl.DPCTLEventVectorRef)
 ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_eye_t)(c_dpctl.DPCTLSyclQueueRef,
                                                      void *, int , const shape_elem_type * ,
                                                      const c_dpctl.DPCTLEventVectorRef)
-ctypedef void(*fptr_dpnp_trace_t)(const void *, void * , const shape_elem_type * , const size_t)
+ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_trace_t)(c_dpctl.DPCTLSyclQueueRef,
+                                                       const void *,
+                                                       void * ,
+                                                       const shape_elem_type * ,
+                                                       const size_t,
+                                                       const c_dpctl.DPCTLEventVectorRef)
 
 
 cpdef utils.dpnp_descriptor dpnp_copy(utils.dpnp_descriptor x1):
@@ -141,7 +153,7 @@ cpdef utils.dpnp_descriptor dpnp_diag(utils.dpnp_descriptor v, int k):
                                                     result.ndim,
                                                     NULL)  # dep_events_ref
 
-    with nogil: c_dpctl.DPCTLEvent_Wait(event_ref)
+    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
     c_dpctl.DPCTLEvent_Delete(event_ref)
 
     return result
@@ -171,7 +183,7 @@ cpdef utils.dpnp_descriptor dpnp_eye(N, M=None, k=0, dtype=None):
 
     cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, result.get_data(), k, result_shape.data(), NULL)
 
-    with nogil: c_dpctl.DPCTLEvent_Wait(event_ref)
+    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
     c_dpctl.DPCTLEvent_Delete(event_ref)
 
     return result
@@ -208,7 +220,7 @@ cpdef utils.dpnp_descriptor dpnp_full(result_shape, value_in, result_dtype):
 
     cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, array_fill.get_data(), result.get_data(), result.size, NULL)
 
-    with nogil: c_dpctl.DPCTLEvent_Wait(event_ref)
+    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
     c_dpctl.DPCTLEvent_Delete(event_ref)
 
     return result
@@ -244,7 +256,7 @@ cpdef utils.dpnp_descriptor dpnp_full_like(result_shape, value_in, result_dtype)
     # Call FPTR function
     cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, array_fill.get_data(), result.get_data(), result.size, NULL)
 
-    with nogil: c_dpctl.DPCTLEvent_Wait(event_ref)
+    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
     c_dpctl.DPCTLEvent_Delete(event_ref)
 
     return result
@@ -294,7 +306,7 @@ cpdef utils.dpnp_descriptor dpnp_identity(n, result_dtype):
     cdef fptr_1out_t func = <fptr_1out_t > kernel_data.ptr
     cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, result.get_data(), n, NULL)
 
-    with nogil: c_dpctl.DPCTLEvent_Wait(event_ref)
+    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
     c_dpctl.DPCTLEvent_Delete(event_ref)
 
     return result
@@ -464,7 +476,7 @@ cpdef dpnp_ptp(utils.dpnp_descriptor arr, axis=None):
                                                     axis_size,
                                                     NULL)  # dep_events_ref
 
-    with nogil: c_dpctl.DPCTLEvent_Wait(event_ref)
+    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
     c_dpctl.DPCTLEvent_Delete(event_ref)
 
     return result
@@ -483,15 +495,35 @@ cpdef utils.dpnp_descriptor dpnp_trace(utils.dpnp_descriptor arr, offset=0, axis
     cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(arr.dtype)
     cdef DPNPFuncType param2_type = dpnp_dtype_to_DPNPFuncType(dtype_)
 
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_TRACE, param1_type, param2_type)
+    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_TRACE_EXT, param1_type, param2_type)
+
+    arr_obj = arr.get_array()
 
     # ceate result array with type given by FPTR data
     cdef shape_type_c result_shape = diagonal_shape[:-1]
-    cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, kernel_data.return_type, None)
+    cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape,
+                                                                       kernel_data.return_type,
+                                                                       None,
+                                                                       device=arr_obj.sycl_device,
+                                                                       usm_type=arr_obj.usm_type,
+                                                                       sycl_queue=arr_obj.sycl_queue)
+
+    result_sycl_queue = result.get_array().sycl_queue
+
+    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
+    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
 
     cdef fptr_dpnp_trace_t func = <fptr_dpnp_trace_t > kernel_data.ptr
 
-    func(diagonal_arr.get_data(), result.get_data(), diagonal_shape.data(), diagonal_ndim)
+    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref,
+                                                    diagonal_arr.get_data(),
+                                                    result.get_data(),
+                                                    diagonal_shape.data(),
+                                                    diagonal_ndim,
+                                                    NULL)  # dep_events_ref
+
+    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
+    c_dpctl.DPCTLEvent_Delete(event_ref)
 
     return result
 
@@ -505,14 +537,22 @@ cpdef utils.dpnp_descriptor dpnp_tri(N, M=None, k=0, dtype=numpy.float):
 
     cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(dtype)
 
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_TRI, param1_type, param1_type)
+    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_TRI_EXT, param1_type, param1_type)
 
     cdef shape_type_c shape_in = (N, M)
     cdef utils.dpnp_descriptor result = utils.create_output_descriptor(shape_in, kernel_data.return_type, None)
 
+    result_sycl_queue = result.get_array().sycl_queue
+
+    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
+    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
+
     cdef custom_indexing_1out_func_ptr_t func = <custom_indexing_1out_func_ptr_t > kernel_data.ptr
 
-    func(result.get_data(), N, M, k)
+    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, result.get_data(), N, M, k, NULL)
+
+    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
+    c_dpctl.DPCTLEvent_Delete(event_ref)
 
     return result
 
@@ -555,7 +595,7 @@ cpdef utils.dpnp_descriptor dpnp_tril(utils.dpnp_descriptor m, int k):
                                                     result.ndim,
                                                     NULL)  # dep_events_ref
 
-    with nogil: c_dpctl.DPCTLEvent_Wait(event_ref)
+    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
     c_dpctl.DPCTLEvent_Delete(event_ref)
 
     return result
@@ -599,7 +639,7 @@ cpdef utils.dpnp_descriptor dpnp_triu(utils.dpnp_descriptor m, int k):
                                                     result.ndim,
                                                     NULL)  # dep_events_ref
 
-    with nogil: c_dpctl.DPCTLEvent_Wait(event_ref)
+    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
     c_dpctl.DPCTLEvent_Delete(event_ref)
 
     return result
@@ -607,14 +647,35 @@ cpdef utils.dpnp_descriptor dpnp_triu(utils.dpnp_descriptor m, int k):
 
 cpdef utils.dpnp_descriptor dpnp_vander(utils.dpnp_descriptor x1, int N, int increasing):
     cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(x1.dtype)
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_VANDER, param1_type, DPNP_FT_NONE)
+    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_VANDER_EXT, param1_type, DPNP_FT_NONE)
+
+    x1_obj = x1.get_array()
 
     # ceate result array with type given by FPTR data
     cdef shape_type_c result_shape = (x1.size, N)
-    cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, kernel_data.return_type, None)
+    cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape,
+                                                                       kernel_data.return_type,
+                                                                       None,
+                                                                       device=x1_obj.sycl_device,
+                                                                       usm_type=x1_obj.usm_type,
+                                                                       sycl_queue=x1_obj.sycl_queue)
+
+    result_sycl_queue = result.get_array().sycl_queue
+
+    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
+    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
 
     cdef ftpr_custom_vander_1in_1out_t func = <ftpr_custom_vander_1in_1out_t > kernel_data.ptr
-    func(x1.get_data(), result.get_data(), x1.size, N, increasing)
+    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref,
+                                                    x1.get_data(),
+                                                    result.get_data(),
+                                                    x1.size,
+                                                    N,
+                                                    increasing,
+                                                    NULL)  # dep_events_ref
+
+    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
+    c_dpctl.DPCTLEvent_Delete(event_ref)
 
     return result
 
diff --git a/dpnp/dpnp_algo/dpnp_algo_indexing.pyx b/dpnp/dpnp_algo/dpnp_algo_indexing.pyx
index 83417a5485be..f920ad0f4007 100644
--- a/dpnp/dpnp_algo/dpnp_algo_indexing.pyx
+++ b/dpnp/dpnp_algo/dpnp_algo_indexing.pyx
@@ -54,46 +54,109 @@ __all__ += [
     "dpnp_triu_indices_from"
 ]
 
-ctypedef void(*fptr_dpnp_choose_t)(void *, void * , void ** , size_t, size_t, size_t)
-ctypedef void(*fptr_dpnp_diag_indices)(void * , size_t)
-ctypedef void(*custom_indexing_2in_1out_func_ptr_t)(void *, const size_t, void * , void * , size_t)
-ctypedef void(*custom_indexing_2in_1out_func_ptr_t_)(void * , const size_t, void * , const size_t, shape_elem_type * ,
-                                                     shape_elem_type *, const size_t)
-ctypedef void(*custom_indexing_2in_func_ptr_t)(void *, void * , shape_elem_type * , const size_t)
-ctypedef void(*custom_indexing_3in_func_ptr_t)(void * , void * , void * , const size_t, const size_t)
-ctypedef void(*custom_indexing_3in_with_axis_func_ptr_t)(void * , void * , void * , const size_t, shape_elem_type * ,
-                                                         const size_t, const size_t, const size_t,)
-ctypedef void(*custom_indexing_6in_func_ptr_t)(void *, void * , void * , const size_t, const size_t, const size_t)
-ctypedef void(*fptr_dpnp_nonzero_t)(const void * , void * , const size_t, const shape_elem_type * , const size_t ,
-                                    const size_t)
-
-
-cpdef utils.dpnp_descriptor dpnp_choose(utils.dpnp_descriptor input, list choices1):
+ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_choose_t)(c_dpctl.DPCTLSyclQueueRef,
+                                                        void *, void * , void ** , size_t, size_t, size_t,
+                                                        const c_dpctl.DPCTLEventVectorRef)
+ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_diag_indices)(c_dpctl.DPCTLSyclQueueRef,
+                                                            void * , size_t,
+                                                            const c_dpctl.DPCTLEventVectorRef)
+ctypedef c_dpctl.DPCTLSyclEventRef(*custom_indexing_2in_1out_func_ptr_t)(c_dpctl.DPCTLSyclQueueRef,
+                                                                         void *,
+                                                                         const size_t,
+                                                                         void * ,
+                                                                         void * ,
+                                                                         size_t,
+                                                                         const c_dpctl.DPCTLEventVectorRef)
+ctypedef c_dpctl.DPCTLSyclEventRef(*custom_indexing_2in_1out_func_ptr_t_)(c_dpctl.DPCTLSyclQueueRef,
+                                                                          void * ,
+                                                                          const size_t,
+                                                                          void * ,
+                                                                          const size_t,
+                                                                          shape_elem_type * ,
+                                                                          shape_elem_type *,
+                                                                          const size_t,
+                                                                          const c_dpctl.DPCTLEventVectorRef)
+ctypedef c_dpctl.DPCTLSyclEventRef(*custom_indexing_2in_func_ptr_t)(c_dpctl.DPCTLSyclQueueRef,
+                                                                    void *, void * , shape_elem_type * , const size_t,
+                                                                    const c_dpctl.DPCTLEventVectorRef)
+ctypedef c_dpctl.DPCTLSyclEventRef(*custom_indexing_3in_func_ptr_t)(c_dpctl.DPCTLSyclQueueRef,
+                                                                    void * ,
+                                                                    void * ,
+                                                                    void * ,
+                                                                    const size_t,
+                                                                    const size_t,
+                                                                    const c_dpctl.DPCTLEventVectorRef)
+ctypedef c_dpctl.DPCTLSyclEventRef(*custom_indexing_3in_with_axis_func_ptr_t)(c_dpctl.DPCTLSyclQueueRef,
+                                                                              void * ,
+                                                                              void * ,
+                                                                              void * ,
+                                                                              const size_t,
+                                                                              shape_elem_type * ,
+                                                                              const size_t,
+                                                                              const size_t,
+                                                                              const size_t,
+                                                                              const c_dpctl.DPCTLEventVectorRef)
+ctypedef c_dpctl.DPCTLSyclEventRef(*custom_indexing_6in_func_ptr_t)(c_dpctl.DPCTLSyclQueueRef,
+                                                                    void *,
+                                                                    void * ,
+                                                                    void * ,
+                                                                    const size_t,
+                                                                    const size_t,
+                                                                    const size_t,
+                                                                    const c_dpctl.DPCTLEventVectorRef)
+ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_nonzero_t)(c_dpctl.DPCTLSyclQueueRef,
+                                                         const void * ,
+                                                         void * ,
+                                                         const size_t,
+                                                         const shape_elem_type * ,
+                                                         const size_t ,
+                                                         const size_t,
+                                                         const c_dpctl.DPCTLEventVectorRef)
+
+
+cpdef utils.dpnp_descriptor dpnp_choose(utils.dpnp_descriptor x1, list choices1):
     cdef vector[void * ] choices
     cdef utils.dpnp_descriptor choice
     for desc in choices1:
         choice = desc
         choices.push_back(choice.get_data())
 
-    cdef shape_type_c input_shape = input.shape
+    cdef shape_type_c x1_shape = x1.shape
     cdef size_t choice_size = choices1[0].size
 
-    cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(input.dtype)
+    cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(x1.dtype)
 
     cdef DPNPFuncType param2_type = dpnp_dtype_to_DPNPFuncType(choices1[0].dtype)
 
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_CHOOSE, param1_type, param2_type)
+    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_CHOOSE_EXT, param1_type, param2_type)
 
-    cdef utils.dpnp_descriptor res_array = utils.create_output_descriptor(input_shape, kernel_data.return_type, None)
+    x1_obj = x1.get_array()
+
+    cdef utils.dpnp_descriptor res_array = utils.create_output_descriptor(x1_shape,
+                                                                          kernel_data.return_type,
+                                                                          None,
+                                                                          device=x1_obj.sycl_device,
+                                                                          usm_type=x1_obj.usm_type,
+                                                                          sycl_queue=x1_obj.sycl_queue)
+
+    result_sycl_queue = res_array.get_array().sycl_queue
+
+    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
+    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
 
     cdef fptr_dpnp_choose_t func = <fptr_dpnp_choose_t > kernel_data.ptr
 
-    func(res_array.get_data(),
-         input.get_data(),
-         choices.data(),
-         input_shape[0],
-         choices.size(),
-         choice_size)
+    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref,
+                                                    res_array.get_data(),
+                                                    x1.get_data(),
+                                                    choices.data(),
+                                                    x1_shape[0],
+                                                    choices.size(),
+                                                    choice_size,
+                                                    NULL)  # dep_events_ref
+
+    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
+    c_dpctl.DPCTLEvent_Delete(event_ref)
 
     return res_array
 
@@ -103,73 +166,120 @@ cpdef tuple dpnp_diag_indices(n, ndim):
 
     cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(dpnp.int64)
 
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_DIAG_INDICES, param1_type, param1_type)
+    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_DIAG_INDICES_EXT, param1_type, param1_type)
 
     cdef fptr_dpnp_diag_indices func = <fptr_dpnp_diag_indices > kernel_data.ptr
 
+    cdef c_dpctl.SyclQueue q
+    cdef c_dpctl.DPCTLSyclQueueRef q_ref
+    cdef c_dpctl.DPCTLSyclEventRef event_ref
+
     res_list = []
     cdef utils.dpnp_descriptor res_arr
     cdef shape_type_c result_shape = utils._object_to_tuple(res_size)
     for i in range(ndim):
         res_arr = utils.create_output_descriptor(result_shape, kernel_data.return_type, None)
 
-        func(res_arr.get_data(), res_size)
+        q = <c_dpctl.SyclQueue> res_arr.get_array().sycl_queue
+        q_ref = q.get_queue_ref()
+
+        event_ref = func(q_ref, res_arr.get_data(), res_size, NULL)
+
+        with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
+        c_dpctl.DPCTLEvent_Delete(event_ref)
 
         res_list.append(res_arr.get_pyobj())
 
     return tuple(res_list)
 
-cpdef utils.dpnp_descriptor dpnp_diagonal(dpnp_descriptor input, offset=0):
-    cdef shape_type_c input_shape = input.shape
 
-    n = min(input.shape[0], input.shape[1])
-    res_shape = [None] * (input.ndim - 1)
+cpdef utils.dpnp_descriptor dpnp_diagonal(dpnp_descriptor x1, offset=0):
+    cdef shape_type_c x1_shape = x1.shape
+
+    n = min(x1.shape[0], x1.shape[1])
+    res_shape = [None] * (x1.ndim - 1)
 
-    if input.ndim > 2:
-        for i in range(input.ndim - 2):
-            res_shape[i] = input.shape[i + 2]
+    if x1.ndim > 2:
+        for i in range(x1.ndim - 2):
+            res_shape[i] = x1.shape[i + 2]
 
-    if (n + offset) > input.shape[1]:
-        res_shape[-1] = input.shape[1] - offset
-    elif (n + offset) > input.shape[0]:
-        res_shape[-1] = input.shape[0]
+    if (n + offset) > x1.shape[1]:
+        res_shape[-1] = x1.shape[1] - offset
+    elif (n + offset) > x1.shape[0]:
+        res_shape[-1] = x1.shape[0]
     else:
         res_shape[-1] = n + offset
 
     cdef shape_type_c result_shape = res_shape
     res_ndim = len(res_shape)
 
-    cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(input.dtype)
+    cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(x1.dtype)
 
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_DIAGONAL, param1_type, param1_type)
+    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_DIAGONAL_EXT, param1_type, param1_type)
 
-    cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, kernel_data.return_type, None)
+    x1_obj = x1.get_array()
+
+    cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape,
+                                                                       kernel_data.return_type,
+                                                                       None,
+                                                                       device=x1_obj.sycl_device,
+                                                                       usm_type=x1_obj.usm_type,
+                                                                       sycl_queue=x1_obj.sycl_queue)
+
+    result_sycl_queue = result.get_array().sycl_queue
+
+    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
+    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
 
     cdef custom_indexing_2in_1out_func_ptr_t_ func = <custom_indexing_2in_1out_func_ptr_t_ > kernel_data.ptr
 
-    func(input.get_data(),
-         input.size,
-         result.get_data(),
-         offset,
-         input_shape.data(),
-         result_shape.data(),
-         res_ndim)
+    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref,
+                                                    x1.get_data(),
+                                                    x1.size,
+                                                    result.get_data(),
+                                                    offset,
+                                                    x1_shape.data(),
+                                                    result_shape.data(),
+                                                    res_ndim,
+                                                    NULL)  # dep_events_ref
+
+    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
+    c_dpctl.DPCTLEvent_Delete(event_ref)
 
     return result
 
 
-cpdef dpnp_fill_diagonal(dpnp_descriptor input, val):
-    cdef shape_type_c input_shape = input.shape
-    cdef utils.dpnp_descriptor val_arr = utils_py.create_output_descriptor_py((1,), input.dtype, None)
+cpdef dpnp_fill_diagonal(dpnp_descriptor x1, val):
+    x1_obj = x1.get_array()
+
+    cdef shape_type_c x1_shape = x1.shape
+    cdef utils.dpnp_descriptor val_arr = utils_py.create_output_descriptor_py((1,),
+                                                                              x1.dtype,
+                                                                              None,
+                                                                              device=x1_obj.sycl_device,
+                                                                              usm_type=x1_obj.usm_type,
+                                                                              sycl_queue=x1_obj.sycl_queue)
+
     val_arr.get_pyobj()[0] = val
 
-    cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(input.dtype)
+    cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(x1.dtype)
 
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_FILL_DIAGONAL, param1_type, param1_type)
+    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_FILL_DIAGONAL_EXT, param1_type, param1_type)
+
+    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> x1_obj.sycl_queue
+    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
 
     cdef custom_indexing_2in_func_ptr_t func = <custom_indexing_2in_func_ptr_t > kernel_data.ptr
 
-    func(input.get_data(), val_arr.get_data(), input_shape.data(), input.ndim)
+    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref,
+                                                    x1.get_data(),
+                                                    val_arr.get_data(),
+                                                    x1_shape.data(),
+                                                    x1.ndim,
+                                                    NULL)  # dep_events_ref
+
+    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
+    c_dpctl.DPCTLEvent_Delete(event_ref)
 
 
 cpdef object dpnp_indices(dimensions):
@@ -216,10 +326,14 @@ cpdef tuple dpnp_nonzero(utils.dpnp_descriptor in_array1):
 
     cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(in_array1.dtype)
 
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_NONZERO, param1_type, param1_type)
+    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_NONZERO_EXT, param1_type, param1_type)
 
     cdef fptr_dpnp_nonzero_t func = <fptr_dpnp_nonzero_t > kernel_data.ptr
 
+    cdef c_dpctl.SyclQueue q
+    cdef c_dpctl.DPCTLSyclQueueRef q_ref
+    cdef c_dpctl.DPCTLSyclEventRef event_ref
+
     array1_obj = in_array1.get_array()
 
     res_list = []
@@ -234,7 +348,20 @@ cpdef tuple dpnp_nonzero(utils.dpnp_descriptor in_array1):
                                                        usm_type=array1_obj.usm_type,
                                                        sycl_queue=array1_obj.sycl_queue)
 
-        func(in_array1.get_data(), res_arr.get_data(), res_arr.size, shape_arr.data(), in_array1.ndim, j)
+        q = <c_dpctl.SyclQueue> res_arr.get_array().sycl_queue
+        q_ref = q.get_queue_ref()
+
+        event_ref = func(q_ref,
+                         in_array1.get_data(),
+                         res_arr.get_data(),
+                         res_arr.size,
+                         shape_arr.data(),
+                         in_array1.ndim,
+                         j,
+                         NULL)  # dep_events_ref
+
+        with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
+        c_dpctl.DPCTLEvent_Delete(event_ref)
 
         res_list.append(res_arr.get_pyobj())
 
@@ -244,7 +371,14 @@ cpdef tuple dpnp_nonzero(utils.dpnp_descriptor in_array1):
 
 
 cpdef dpnp_place(dpnp_descriptor arr, object mask, dpnp_descriptor vals):
-    cdef utils.dpnp_descriptor mask_ = utils_py.create_output_descriptor_py((mask.size,), dpnp.int64, None)
+    result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(arr, vals)
+
+    cdef utils.dpnp_descriptor mask_ = utils_py.create_output_descriptor_py((mask.size,),
+                                                                            dpnp.int64,
+                                                                            None,
+                                                                            device=result_sycl_device,
+                                                                            usm_type=result_usm_type,
+                                                                            sycl_queue=result_sycl_queue)
     for i in range(mask.size):
         if mask.item(i):
             mask_.get_pyobj()[i] = 1
@@ -252,21 +386,40 @@ cpdef dpnp_place(dpnp_descriptor arr, object mask, dpnp_descriptor vals):
             mask_.get_pyobj()[i] = 0
     cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(arr.dtype)
 
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_PLACE, param1_type, param1_type)
+    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_PLACE_EXT, param1_type, param1_type)
+
+    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
+    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
 
     cdef custom_indexing_3in_func_ptr_t func = <custom_indexing_3in_func_ptr_t > kernel_data.ptr
 
-    func(arr.get_data(), mask_.get_data(), vals.get_data(), arr.size, vals.size)
+    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref,
+                                                    arr.get_data(),
+                                                    mask_.get_data(),
+                                                    vals.get_data(),
+                                                    arr.size,
+                                                    vals.size,
+                                                    NULL)  # dep_events_ref
 
+    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
+    c_dpctl.DPCTLEvent_Delete(event_ref)
 
-cpdef dpnp_put(dpnp_descriptor input, object ind, v):
+
+cpdef dpnp_put(dpnp_descriptor x1, object ind, v):
     ind_is_list = isinstance(ind, list)
 
+    x1_obj = x1.get_array()
+
     if dpnp.isscalar(ind):
         ind_size = 1
     else:
         ind_size = len(ind)
-    cdef utils.dpnp_descriptor ind_array = utils_py.create_output_descriptor_py((ind_size,), dpnp.int64, None)
+    cdef utils.dpnp_descriptor ind_array = utils_py.create_output_descriptor_py((ind_size,),
+                                                                                 dpnp.int64,
+                                                                                 None,
+                                                                                 device=x1_obj.sycl_device,
+                                                                                 usm_type=x1_obj.usm_type,
+                                                                                 sycl_queue=x1_obj.sycl_queue)
     if dpnp.isscalar(ind):
         ind_array.get_pyobj()[0] = ind
     else:
@@ -277,32 +430,67 @@ cpdef dpnp_put(dpnp_descriptor input, object ind, v):
         v_size = 1
     else:
         v_size = len(v)
-    cdef utils.dpnp_descriptor v_array = utils_py.create_output_descriptor_py((v_size,), input.dtype, None)
+    cdef utils.dpnp_descriptor v_array = utils_py.create_output_descriptor_py((v_size,),
+                                                                               x1.dtype,
+                                                                               None,
+                                                                               device=x1_obj.sycl_device,
+                                                                               usm_type=x1_obj.usm_type,
+                                                                               sycl_queue=x1_obj.sycl_queue)
     if dpnp.isscalar(v):
         v_array.get_pyobj()[0] = v
     else:
         for i in range(v_size):
             v_array.get_pyobj()[i] = v[i]
 
-    cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(input.dtype)
+    cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(x1.dtype)
 
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_PUT, param1_type, param1_type)
+    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_PUT_EXT, param1_type, param1_type)
+
+    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> x1_obj.sycl_queue
+    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
 
     cdef custom_indexing_6in_func_ptr_t func = <custom_indexing_6in_func_ptr_t > kernel_data.ptr
 
-    func(input.get_data(), ind_array.get_data(), v_array.get_data(), input.size, ind_array.size, v_array.size)
+    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref,
+                                                    x1.get_data(),
+                                                    ind_array.get_data(),
+                                                    v_array.get_data(),
+                                                    x1.size,
+                                                    ind_array.size,
+                                                    v_array.size,
+                                                    NULL)  # dep_events_ref
+
+    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
+    c_dpctl.DPCTLEvent_Delete(event_ref)
 
 
 cpdef dpnp_put_along_axis(dpnp_descriptor arr, dpnp_descriptor indices, dpnp_descriptor values, int axis):
     cdef shape_type_c arr_shape = arr.shape
     cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(arr.dtype)
 
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_PUT_ALONG_AXIS, param1_type, param1_type)
+    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_PUT_ALONG_AXIS_EXT, param1_type, param1_type)
+
+    utils.get_common_usm_allocation(arr, indices)  # check USM allocation is common
+    _, _, result_sycl_queue = utils.get_common_usm_allocation(arr, values)
+
+    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
+    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
 
     cdef custom_indexing_3in_with_axis_func_ptr_t func = <custom_indexing_3in_with_axis_func_ptr_t > kernel_data.ptr
 
-    func(arr.get_data(), indices.get_data(), values.get_data(),
-         axis, arr_shape.data(), arr.ndim, indices.size, values.size)
+    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref,
+                                                    arr.get_data(),
+                                                    indices.get_data(),
+                                                    values.get_data(),
+                                                    axis,
+                                                    arr_shape.data(),
+                                                    arr.ndim,
+                                                    indices.size,
+                                                    values.size,
+                                                    NULL)  # dep_events_ref
+
+    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
+    c_dpctl.DPCTLEvent_Delete(event_ref)
 
 
 cpdef dpnp_putmask(utils.dpnp_descriptor arr, utils.dpnp_descriptor mask, utils.dpnp_descriptor values):
@@ -334,16 +522,37 @@ cpdef utils.dpnp_descriptor dpnp_select(list condlist, list choicelist, default)
     return res_array
 
 
-cpdef utils.dpnp_descriptor dpnp_take(utils.dpnp_descriptor input, utils.dpnp_descriptor indices):
-    cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(input.dtype)
+cpdef utils.dpnp_descriptor dpnp_take(utils.dpnp_descriptor x1, utils.dpnp_descriptor indices):
+    cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(x1.dtype)
+
+    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_TAKE_EXT, param1_type, param1_type)
 
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_TAKE, param1_type, param1_type)
+    x1_obj = x1.get_array()
 
-    cdef utils.dpnp_descriptor result = utils.create_output_descriptor(indices.shape, kernel_data.return_type, None)
+    cdef utils.dpnp_descriptor result = utils.create_output_descriptor(indices.shape,
+                                                                       kernel_data.return_type,
+                                                                       None,
+                                                                       device=x1_obj.sycl_device,
+                                                                       usm_type=x1_obj.usm_type,
+                                                                       sycl_queue=x1_obj.sycl_queue)
+
+    result_sycl_queue = result.get_array().sycl_queue
+
+    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
+    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
 
     cdef custom_indexing_2in_1out_func_ptr_t func = <custom_indexing_2in_1out_func_ptr_t > kernel_data.ptr
 
-    func(input.get_data(), input.size, indices.get_data(), result.get_data(), indices.size)
+    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref,
+                                                    x1.get_data(),
+                                                    x1.size,
+                                                    indices.get_data(),
+                                                    result.get_data(),
+                                                    indices.size,
+                                                    NULL)  # dep_events_ref
+
+    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
+    c_dpctl.DPCTLEvent_Delete(event_ref)
 
     return result
 
diff --git a/dpnp/dpnp_algo/dpnp_algo_linearalgebra.pyx b/dpnp/dpnp_algo/dpnp_algo_linearalgebra.pyx
index 33eb55e5dcef..c738cc75b705 100644
--- a/dpnp/dpnp_algo/dpnp_algo_linearalgebra.pyx
+++ b/dpnp/dpnp_algo/dpnp_algo_linearalgebra.pyx
@@ -44,14 +44,18 @@ __all__ += [
 
 
 # C function pointer to the C library template functions
-ctypedef void(*fptr_2in_1out_shapes_t)(void *, void * , void * , shape_elem_type * ,
-                                       shape_elem_type *, shape_elem_type * , size_t)
-ctypedef void(*fptr_2in_1out_dot_t)(void * , const size_t, const size_t,
-                                    const shape_elem_type *, const shape_elem_type * ,
-                                    void * , const size_t, const size_t,
-                                    const shape_elem_type *, const shape_elem_type * ,
-                                    void * , const size_t, const size_t,
-                                    const shape_elem_type *, const shape_elem_type * )
+ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_2in_1out_shapes_t)(c_dpctl.DPCTLSyclQueueRef,
+                                                            void *, void * , void * , shape_elem_type * ,
+                                                            shape_elem_type *, shape_elem_type * , size_t,
+                                                            const c_dpctl.DPCTLEventVectorRef)
+ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_2in_1out_dot_t)(c_dpctl.DPCTLSyclQueueRef,
+                                                         void * , const size_t, const size_t,
+                                                         const shape_elem_type *, const shape_elem_type * ,
+                                                         void * , const size_t, const size_t,
+                                                         const shape_elem_type *, const shape_elem_type * ,
+                                                         void * , const size_t, const size_t,
+                                                         const shape_elem_type *, const shape_elem_type * ,
+                                                         const c_dpctl.DPCTLEventVectorRef)
 ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_2in_1out_matmul_t)(c_dpctl.DPCTLSyclQueueRef,
                                                             void * , const size_t, const size_t,
                                                             const shape_elem_type *, const shape_elem_type * ,
@@ -73,7 +77,7 @@ cpdef utils.dpnp_descriptor dpnp_dot(utils.dpnp_descriptor in_array1, utils.dpnp
     cdef DPNPFuncType param2_type = dpnp_dtype_to_DPNPFuncType(in_array2.dtype)
 
     # get the FPTR data structure
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_DOT, param1_type, param2_type)
+    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_DOT_EXT, param1_type, param2_type)
 
     ndim1 = in_array1.ndim
     ndim2 = in_array2.ndim
@@ -95,8 +99,15 @@ cpdef utils.dpnp_descriptor dpnp_dot(utils.dpnp_descriptor in_array1, utils.dpnp
             shape2 = (shape1[0], 1)
         result_shape = shape1[:-1] + shape2[:-2] + shape2[-1:]
 
+    result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(in_array1, in_array2)
+
     # create result array with type given by FPTR data
-    cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, kernel_data.return_type, None)
+    cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape,
+                                                                       kernel_data.return_type,
+                                                                       None,
+                                                                       device=result_sycl_device,
+                                                                       usm_type=result_usm_type,
+                                                                       sycl_queue=result_sycl_queue)
 
     cdef shape_type_c result_strides = utils.strides_to_vector(result.strides, result.shape)
     cdef shape_type_c in_array1_shape = in_array1.shape
@@ -104,23 +115,31 @@ cpdef utils.dpnp_descriptor dpnp_dot(utils.dpnp_descriptor in_array1, utils.dpnp
     cdef shape_type_c in_array2_shape = in_array2.shape
     cdef shape_type_c in_array2_strides = utils.strides_to_vector(in_array2.strides, in_array2.shape)
 
+    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
+    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
+
     cdef fptr_2in_1out_dot_t func = <fptr_2in_1out_dot_t > kernel_data.ptr
     # call FPTR function
-    func(result.get_data(),
-         result.size,
-         result.ndim,
-         result_shape.data(),
-         result_strides.data(),
-         in_array1.get_data(),
-         in_array1.size,
-         in_array1.ndim,
-         in_array1_shape.data(),
-         in_array1_strides.data(),
-         in_array2.get_data(),
-         in_array2.size,
-         in_array2.ndim,
-         in_array2_shape.data(),
-         in_array2_strides.data())
+    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref,
+                                                    result.get_data(),
+                                                    result.size,
+                                                    result.ndim,
+                                                    result_shape.data(),
+                                                    result_strides.data(),
+                                                    in_array1.get_data(),
+                                                    in_array1.size,
+                                                    in_array1.ndim,
+                                                    in_array1_shape.data(),
+                                                    in_array1_strides.data(),
+                                                    in_array2.get_data(),
+                                                    in_array2.size,
+                                                    in_array2.ndim,
+                                                    in_array2_shape.data(),
+                                                    in_array2_strides.data(),
+                                                    NULL)  # dep_events_ref
+
+    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
+    c_dpctl.DPCTLEvent_Delete(event_ref)
 
     return result
 
@@ -136,8 +155,15 @@ cpdef utils.dpnp_descriptor dpnp_inner(dpnp_descriptor array1, dpnp_descriptor a
     cdef shape_type_c result_shape = array1_no_last_axes
     result_shape.insert(result_shape.end(), array2_no_last_axes.begin(), array2_no_last_axes.end())
 
+    result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(array1, array2)
+
     # ceate result array with type given by FPTR data
-    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(result_shape, result_type, None)
+    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(result_shape,
+                                                                             result_type,
+                                                                             None,
+                                                                             device=result_sycl_device,
+                                                                             usm_type=result_usm_type,
+                                                                             sycl_queue=result_sycl_queue)
 
     # calculate input arrays offsets
     cdef shape_type_c array1_offsets = [1] * len(array1.shape)
@@ -218,15 +244,35 @@ cpdef utils.dpnp_descriptor dpnp_kron(dpnp_descriptor in_array1, dpnp_descriptor
     cdef DPNPFuncType param2_type = dpnp_dtype_to_DPNPFuncType(in_array2.dtype)
 
     # get the FPTR data structure
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_KRON, param1_type, param2_type)
+    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_KRON_EXT, param1_type, param2_type)
+
+    result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(in_array1, in_array2)
 
     # ceate result array with type given by FPTR data
-    cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, kernel_data.return_type, None)
+    cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape,
+                                                                       kernel_data.return_type,
+                                                                       None,
+                                                                       device=result_sycl_device,
+                                                                       usm_type=result_usm_type,
+                                                                       sycl_queue=result_sycl_queue)
+
+    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
+    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
 
     cdef fptr_2in_1out_shapes_t func = <fptr_2in_1out_shapes_t > kernel_data.ptr
     # call FPTR function
-    func(in_array1.get_data(), in_array2.get_data(), result.get_data(),
-         in_array1_shape.data(), in_array2_shape.data(), result_shape.data(), ndim)
+    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref,
+                                                    in_array1.get_data(),
+                                                    in_array2.get_data(),
+                                                    result.get_data(),
+                                                    in_array1_shape.data(),
+                                                    in_array2_shape.data(),
+                                                    result_shape.data(),
+                                                    ndim,
+                                                    NULL)  # dep_events_ref
+
+    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
+    c_dpctl.DPCTLEvent_Delete(event_ref)
 
     return result
 
@@ -324,7 +370,14 @@ cpdef utils.dpnp_descriptor dpnp_outer(utils.dpnp_descriptor array1, utils.dpnp_
     cdef shape_type_c result_shape = (array1.size, array2.size)
     result_type = numpy.promote_types(array1.dtype, array1.dtype)
 
-    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(result_shape, result_type, None)
+    result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(array1, array2)
+
+    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(result_shape,
+                                                                             result_type,
+                                                                             None,
+                                                                             device=result_sycl_device,
+                                                                             usm_type=result_usm_type,
+                                                                             sycl_queue=result_sycl_queue)
 
     result_flatiter = result.get_pyobj().flat
     array1_flatiter = array1.get_pyobj().flat
diff --git a/dpnp/dpnp_algo/dpnp_algo_logic.pyx b/dpnp/dpnp_algo/dpnp_algo_logic.pyx
index c2bc4dfcc955..e0b928ddf025 100644
--- a/dpnp/dpnp_algo/dpnp_algo_logic.pyx
+++ b/dpnp/dpnp_algo/dpnp_algo_logic.pyx
@@ -55,20 +55,44 @@ __all__ += [
 ]
 
 
-ctypedef void(*custom_logic_1in_1out_func_ptr_t)(void *, void * , const size_t)
-ctypedef void(*custom_allclose_1in_1out_func_ptr_t)(void * , void * , void * , const size_t, double, double)
+ctypedef c_dpctl.DPCTLSyclEventRef(*custom_logic_1in_1out_func_ptr_t)(c_dpctl.DPCTLSyclQueueRef,
+                                                                      void *, void * , const size_t,
+                                                                      const c_dpctl.DPCTLEventVectorRef)
+ctypedef c_dpctl.DPCTLSyclEventRef(*custom_allclose_1in_1out_func_ptr_t)(c_dpctl.DPCTLSyclQueueRef,
+                                                                         void * ,
+                                                                         void * ,
+                                                                         void * ,
+                                                                         const size_t,
+                                                                         double,
+                                                                         double,
+                                                                         const c_dpctl.DPCTLEventVectorRef)
 
 
 cpdef utils.dpnp_descriptor dpnp_all(utils.dpnp_descriptor array1):
-    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py((1,), dpnp.bool, None)
+    array1_obj = array1.get_array()
+
+    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py((1,),
+                                                                             dpnp.bool,
+                                                                             None,
+                                                                             device=array1_obj.sycl_device,
+                                                                             usm_type=array1_obj.usm_type,
+                                                                             sycl_queue=array1_obj.sycl_queue)
+
+    result_sycl_queue = result.get_array().sycl_queue
+
+    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
+    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
 
     cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(array1.dtype)
 
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_ALL, param1_type, param1_type)
+    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_ALL_EXT, param1_type, param1_type)
 
     cdef custom_logic_1in_1out_func_ptr_t func = <custom_logic_1in_1out_func_ptr_t > kernel_data.ptr
 
-    func(array1.get_data(), result.get_data(), array1.size)
+    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, array1.get_data(), result.get_data(), array1.size, NULL)
+
+    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
+    c_dpctl.DPCTLEvent_Delete(event_ref)
 
     return result
 
@@ -77,48 +101,93 @@ cpdef utils.dpnp_descriptor dpnp_allclose(utils.dpnp_descriptor array1,
                                           utils.dpnp_descriptor array2,
                                           double rtol_val,
                                           double atol_val):
-    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py((1,), dpnp.bool, None)
+    result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(array1, array2)
+
+    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py((1,),
+                                                                             dpnp.bool,
+                                                                             None,
+                                                                             device=result_sycl_device,
+                                                                             usm_type=result_usm_type,
+                                                                             sycl_queue=result_sycl_queue)
 
     cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(array1.dtype)
     cdef DPNPFuncType param2_type = dpnp_dtype_to_DPNPFuncType(array2.dtype)
 
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_ALLCLOSE, param1_type, param2_type)
+    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_ALLCLOSE_EXT, param1_type, param2_type)
+
+    result_sycl_queue = result.get_array().sycl_queue
+
+    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
+    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
 
     cdef custom_allclose_1in_1out_func_ptr_t func = <custom_allclose_1in_1out_func_ptr_t > kernel_data.ptr
 
-    func(array1.get_data(), array2.get_data(), result.get_data(), array1.size, rtol_val, atol_val)
+    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref,
+                                                    array1.get_data(),
+                                                    array2.get_data(),
+                                                    result.get_data(),
+                                                    array1.size,
+                                                    rtol_val,
+                                                    atol_val,
+                                                    NULL)  # dep_events_ref
+
+    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
+    c_dpctl.DPCTLEvent_Delete(event_ref)
 
     return result
 
 
 cpdef utils.dpnp_descriptor dpnp_any(utils.dpnp_descriptor array1):
-    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py((1,), dpnp.bool, None)
+    array1_obj = array1.get_array()
+
+    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py((1,),
+                                                                             dpnp.bool,
+                                                                             None,
+                                                                             device=array1_obj.sycl_device,
+                                                                             usm_type=array1_obj.usm_type,
+                                                                             sycl_queue=array1_obj.sycl_queue)
 
     cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(array1.dtype)
 
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_ANY, param1_type, param1_type)
+    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_ANY_EXT, param1_type, param1_type)
+
+    result_sycl_queue = result.get_array().sycl_queue
+
+    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
+    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
 
     cdef custom_logic_1in_1out_func_ptr_t func = <custom_logic_1in_1out_func_ptr_t > kernel_data.ptr
 
-    func(array1.get_data(), result.get_data(), array1.size)
+    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, array1.get_data(), result.get_data(), array1.size, NULL)
+
+    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
+    c_dpctl.DPCTLEvent_Delete(event_ref)
 
     return result
 
 
-cpdef utils.dpnp_descriptor dpnp_equal(utils.dpnp_descriptor array1, utils.dpnp_descriptor input2):
-    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(array1.shape,
+cpdef utils.dpnp_descriptor dpnp_equal(utils.dpnp_descriptor input1, utils.dpnp_descriptor input2):
+    result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(input1, input2)
+    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape,
                                                                              dpnp.bool,
-                                                                             None)
+                                                                             None,
+                                                                             device=result_sycl_device,
+                                                                             usm_type=result_usm_type,
+                                                                             sycl_queue=result_sycl_queue)
     for i in range(result.size):
-        result.get_pyobj()[i] = dpnp.bool(array1.get_pyobj()[i] == input2.get_pyobj()[i])
+        result.get_pyobj()[i] = dpnp.bool(input1.get_pyobj()[i] == input2.get_pyobj()[i])
 
     return result
 
 
 cpdef utils.dpnp_descriptor dpnp_greater(utils.dpnp_descriptor input1, utils.dpnp_descriptor input2):
+    result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(input1, input2)
     cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape,
                                                                              dpnp.bool,
-                                                                             None)
+                                                                             None,
+                                                                             device=result_sycl_device,
+                                                                             usm_type=result_usm_type,
+                                                                             sycl_queue=result_sycl_queue)
     for i in range(result.size):
         result.get_pyobj()[i] = dpnp.bool(input1.get_pyobj()[i] > input2.get_pyobj()[i])
 
@@ -126,9 +195,13 @@ cpdef utils.dpnp_descriptor dpnp_greater(utils.dpnp_descriptor input1, utils.dpn
 
 
 cpdef utils.dpnp_descriptor dpnp_greater_equal(utils.dpnp_descriptor input1, utils.dpnp_descriptor input2):
+    result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(input1, input2)
     cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape,
                                                                              dpnp.bool,
-                                                                             None)
+                                                                             None,
+                                                                             device=result_sycl_device,
+                                                                             usm_type=result_usm_type,
+                                                                             sycl_queue=result_sycl_queue)
     for i in range(result.size):
         result.get_pyobj()[i] = dpnp.bool(input1.get_pyobj()[i] >= input2.get_pyobj()[i])
 
@@ -140,7 +213,13 @@ cpdef utils.dpnp_descriptor dpnp_isclose(utils.dpnp_descriptor input1,
                                          double rtol=1e-05,
                                          double atol=1e-08,
                                          cpp_bool equal_nan=False):
-    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape, dpnp.bool, None)
+    result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(input1, input2)
+    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape,
+                                                                             dpnp.bool,
+                                                                             None,
+                                                                             device=result_sycl_device,
+                                                                             usm_type=result_usm_type,
+                                                                             sycl_queue=result_sycl_queue)
 
     for i in range(result.size):
         result.get_pyobj()[i] = numpy.isclose(input1.get_pyobj()[i], input2.get_pyobj()[i], rtol, atol, equal_nan)
@@ -149,7 +228,13 @@ cpdef utils.dpnp_descriptor dpnp_isclose(utils.dpnp_descriptor input1,
 
 
 cpdef utils.dpnp_descriptor dpnp_isfinite(utils.dpnp_descriptor input1):
-    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape, dpnp.bool, None)
+    input1_obj = input1.get_array()
+    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape,
+                                                                             dpnp.bool,
+                                                                             None,
+                                                                             device=input1_obj.sycl_device,
+                                                                             usm_type=input1_obj.usm_type,
+                                                                             sycl_queue=input1_obj.sycl_queue)
 
     for i in range(result.size):
         result.get_pyobj()[i] = numpy.isfinite(input1.get_pyobj()[i])
@@ -158,7 +243,13 @@ cpdef utils.dpnp_descriptor dpnp_isfinite(utils.dpnp_descriptor input1):
 
 
 cpdef utils.dpnp_descriptor dpnp_isinf(utils.dpnp_descriptor input1):
-    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape, dpnp.bool, None)
+    input1_obj = input1.get_array()
+    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape,
+                                                                             dpnp.bool,
+                                                                             None,
+                                                                             device=input1_obj.sycl_device,
+                                                                             usm_type=input1_obj.usm_type,
+                                                                             sycl_queue=input1_obj.sycl_queue)
 
     for i in range(result.size):
         result.get_pyobj()[i] = numpy.isinf(input1.get_pyobj()[i])
@@ -167,7 +258,13 @@ cpdef utils.dpnp_descriptor dpnp_isinf(utils.dpnp_descriptor input1):
 
 
 cpdef utils.dpnp_descriptor dpnp_isnan(utils.dpnp_descriptor input1):
-    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape, dpnp.bool, None)
+    input1_obj = input1.get_array()
+    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape,
+                                                                             dpnp.bool,
+                                                                             None,
+                                                                             device=input1_obj.sycl_device,
+                                                                             usm_type=input1_obj.usm_type,
+                                                                             sycl_queue=input1_obj.sycl_queue)
 
     for i in range(result.size):
         result.get_pyobj()[i] = numpy.isnan(input1.get_pyobj()[i])
@@ -176,9 +273,13 @@ cpdef utils.dpnp_descriptor dpnp_isnan(utils.dpnp_descriptor input1):
 
 
 cpdef utils.dpnp_descriptor dpnp_less(utils.dpnp_descriptor input1, utils.dpnp_descriptor input2):
+    result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(input1, input2)
     cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape,
                                                                              dpnp.bool,
-                                                                             None)
+                                                                             None,
+                                                                             device=result_sycl_device,
+                                                                             usm_type=result_usm_type,
+                                                                             sycl_queue=result_sycl_queue)
     for i in range(result.size):
         result.get_pyobj()[i] = dpnp.bool(input1.get_pyobj()[i] < input2.get_pyobj()[i])
 
@@ -186,9 +287,13 @@ cpdef utils.dpnp_descriptor dpnp_less(utils.dpnp_descriptor input1, utils.dpnp_d
 
 
 cpdef utils.dpnp_descriptor dpnp_less_equal(utils.dpnp_descriptor input1, utils.dpnp_descriptor input2):
+    result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(input1, input2)
     cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape,
                                                                              dpnp.bool,
-                                                                             None)
+                                                                             None,
+                                                                             device=result_sycl_device,
+                                                                             usm_type=result_usm_type,
+                                                                             sycl_queue=result_sycl_queue)
     for i in range(result.size):
         result.get_pyobj()[i] = dpnp.bool(input1.get_pyobj()[i] <= input2.get_pyobj()[i])
 
@@ -196,7 +301,13 @@ cpdef utils.dpnp_descriptor dpnp_less_equal(utils.dpnp_descriptor input1, utils.
 
 
 cpdef utils.dpnp_descriptor dpnp_logical_and(utils.dpnp_descriptor input1, utils.dpnp_descriptor input2):
-    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape, dpnp.bool, None)
+    result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(input1, input2)
+    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape,
+                                                                             dpnp.bool,
+                                                                             None,
+                                                                             device=result_sycl_device,
+                                                                             usm_type=result_usm_type,
+                                                                             sycl_queue=result_sycl_queue)
 
     for i in range(result.size):
         result.get_pyobj()[i] = numpy.logical_and(input1.get_pyobj()[i], input2.get_pyobj()[i])
@@ -205,7 +316,13 @@ cpdef utils.dpnp_descriptor dpnp_logical_and(utils.dpnp_descriptor input1, utils
 
 
 cpdef utils.dpnp_descriptor dpnp_logical_not(utils.dpnp_descriptor input1):
-    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape, dpnp.bool, None)
+    input1_obj = input1.get_array()
+    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape,
+                                                                             dpnp.bool,
+                                                                             None,
+                                                                             device=input1_obj.sycl_device,
+                                                                             usm_type=input1_obj.usm_type,
+                                                                             sycl_queue=input1_obj.sycl_queue)
 
     for i in range(result.size):
         result.get_pyobj()[i] = numpy.logical_not(input1.get_pyobj()[i])
@@ -214,7 +331,13 @@ cpdef utils.dpnp_descriptor dpnp_logical_not(utils.dpnp_descriptor input1):
 
 
 cpdef utils.dpnp_descriptor dpnp_logical_or(utils.dpnp_descriptor input1, utils.dpnp_descriptor input2):
-    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape, dpnp.bool, None)
+    result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(input1, input2)
+    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape,
+                                                                             dpnp.bool,
+                                                                             None,
+                                                                             device=result_sycl_device,
+                                                                             usm_type=result_usm_type,
+                                                                             sycl_queue=result_sycl_queue)
 
     for i in range(result.size):
         result.get_pyobj()[i] = numpy.logical_or(input1.get_pyobj()[i], input2.get_pyobj()[i])
@@ -223,7 +346,13 @@ cpdef utils.dpnp_descriptor dpnp_logical_or(utils.dpnp_descriptor input1, utils.
 
 
 cpdef utils.dpnp_descriptor dpnp_logical_xor(utils.dpnp_descriptor input1, utils.dpnp_descriptor input2):
-    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape, dpnp.bool, None)
+    result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(input1, input2)
+    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape,
+                                                                             dpnp.bool,
+                                                                             None,
+                                                                             device=result_sycl_device,
+                                                                             usm_type=result_usm_type,
+                                                                             sycl_queue=result_sycl_queue)
 
     for i in range(result.size):
         result.get_pyobj()[i] = numpy.logical_xor(input1.get_pyobj()[i], input2.get_pyobj()[i])
@@ -232,9 +361,13 @@ cpdef utils.dpnp_descriptor dpnp_logical_xor(utils.dpnp_descriptor input1, utils
 
 
 cpdef utils.dpnp_descriptor dpnp_not_equal(utils.dpnp_descriptor input1, utils.dpnp_descriptor input2):
+    result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(input1, input2)
     cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape,
                                                                              dpnp.bool,
-                                                                             None)
+                                                                             None,
+                                                                             device=result_sycl_device,
+                                                                             usm_type=result_usm_type,
+                                                                             sycl_queue=result_sycl_queue)
     for i in range(result.size):
         result.get_pyobj()[i] = dpnp.bool(input1.get_pyobj()[i] != input2.get_pyobj()[i])
 
diff --git a/dpnp/dpnp_algo/dpnp_algo_manipulation.pyx b/dpnp/dpnp_algo/dpnp_algo_manipulation.pyx
index f035761f12a2..b2b0c4f9068e 100644
--- a/dpnp/dpnp_algo/dpnp_algo_manipulation.pyx
+++ b/dpnp/dpnp_algo/dpnp_algo_manipulation.pyx
@@ -47,9 +47,18 @@ __all__ += [
 
 
 # C function pointer to the C library template functions
-ctypedef void(*fptr_custom_elemwise_transpose_1in_1out_t)(void * , shape_elem_type * , shape_elem_type * ,
-                                                          shape_elem_type * , size_t, void * , size_t)
-ctypedef void(*fptr_dpnp_repeat_t)(const void *, void * , const size_t , const size_t)
+ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_custom_elemwise_transpose_1in_1out_t)(c_dpctl.DPCTLSyclQueueRef,
+                                                                               void * ,
+                                                                               shape_elem_type * ,
+                                                                               shape_elem_type * ,
+                                                                               shape_elem_type * ,
+                                                                               size_t,
+                                                                               void * ,
+                                                                               size_t,
+                                                                               const c_dpctl.DPCTLEventVectorRef)
+ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_repeat_t)(c_dpctl.DPCTLSyclQueueRef,
+                                                        const void *, void * , const size_t , const size_t,
+                                                        const c_dpctl.DPCTLEventVectorRef)
 
 
 cpdef utils.dpnp_descriptor dpnp_atleast_2d(utils.dpnp_descriptor arr):
@@ -58,7 +67,13 @@ cpdef utils.dpnp_descriptor dpnp_atleast_2d(utils.dpnp_descriptor arr):
     cdef size_t arr_ndim = arr.ndim
     cdef long arr_size = arr.size
     if arr_ndim == 1:
-        result = utils_py.create_output_descriptor_py((1, arr_size), arr.dtype, None)
+        arr_obj = arr.get_array()
+        result = utils_py.create_output_descriptor_py((1, arr_size),
+                                                      arr.dtype,
+                                                      None,
+                                                      device=arr_obj.sycl_device,
+                                                      usm_type=arr_obj.usm_type,
+                                                      sycl_queue=arr_obj.sycl_queue)
         for i in range(arr_size):
             result.get_pyobj()[0, i] = arr.get_pyobj()[i]
         return result
@@ -72,13 +87,26 @@ cpdef utils.dpnp_descriptor dpnp_atleast_3d(utils.dpnp_descriptor arr):
     cdef size_t arr_ndim = arr.ndim
     cdef shape_type_c arr_shape = arr.shape
     cdef long arr_size = arr.size
+
+    arr_obj = arr.get_array()
+
     if arr_ndim == 1:
-        result = utils_py.create_output_descriptor_py((1, 1, arr_size), arr.dtype, None)
+        result = utils_py.create_output_descriptor_py((1, 1, arr_size),
+                                                      arr.dtype,
+                                                      None,
+                                                      device=arr_obj.sycl_device,
+                                                      usm_type=arr_obj.usm_type,
+                                                      sycl_queue=arr_obj.sycl_queue)
         for i in range(arr_size):
             result.get_pyobj()[0, 0, i] = arr.get_pyobj()[i]
         return result
     elif arr_ndim == 2:
-        result = utils_py.create_output_descriptor_py((1, arr_shape[0], arr_shape[1]), arr.dtype, None)
+        result = utils_py.create_output_descriptor_py((1, arr_shape[0], arr_shape[1]),
+                                                      arr.dtype,
+                                                      None,
+                                                      device=arr_obj.sycl_device,
+                                                      usm_type=arr_obj.usm_type,
+                                                      sycl_queue=arr_obj.sycl_queue)
         for i in range(arr_shape[0]):
             for j in range(arr_shape[1]):
                 result.get_pyobj()[0, i, j] = arr.get_pyobj()[i, j]
@@ -122,7 +150,7 @@ cpdef dpnp_copyto(utils.dpnp_descriptor dst, utils.dpnp_descriptor src, where=Tr
                                                     NULL,
                                                     NULL)  # dep_events_ref
 
-    with nogil: c_dpctl.DPCTLEvent_Wait(event_ref)
+    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
     c_dpctl.DPCTLEvent_Delete(event_ref)
 
 
@@ -152,22 +180,39 @@ cpdef utils.dpnp_descriptor dpnp_expand_dims(utils.dpnp_descriptor in_array, axi
             shape_list.push_back(in_array.shape[axis_idx])
             axis_idx = axis_idx + 1
 
-    cdef utils.dpnp_descriptor result = dpnp.get_dpnp_descriptor(dpnp.reshape(dpnp_copy(in_array).get_pyobj(), (shape_list)))
-
-    return result
+    return dpnp_reshape(in_array, shape_list)
 
 
 cpdef utils.dpnp_descriptor dpnp_repeat(utils.dpnp_descriptor array1, repeats, axes=None):
     cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(array1.dtype)
 
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_REPEAT, param1_type, param1_type)
+    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_REPEAT_EXT, param1_type, param1_type)
+
+    array1_obj = array1.get_array()
 
     # ceate result array with type given by FPTR data
     cdef shape_type_c result_shape = (array1.size * repeats, )
-    cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, kernel_data.return_type, None)
+    cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape,
+                                                                       kernel_data.return_type,
+                                                                       None,
+                                                                       device=array1_obj.sycl_device,
+                                                                       usm_type=array1_obj.usm_type,
+                                                                       sycl_queue=array1_obj.sycl_queue)
+    result_sycl_queue = result.get_array().sycl_queue
+
+    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
+    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
 
     cdef fptr_dpnp_repeat_t func = <fptr_dpnp_repeat_t > kernel_data.ptr
-    func(array1.get_data(), result.get_data(), repeats, array1.size)
+    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref,
+                                                    array1.get_data(),
+                                                    result.get_data(),
+                                                    repeats,
+                                                    array1.size,
+                                                    NULL)  # dep_events_ref
+
+    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
+    c_dpctl.DPCTLEvent_Delete(event_ref)
 
     return result
 
@@ -175,8 +220,14 @@ cpdef utils.dpnp_descriptor dpnp_repeat(utils.dpnp_descriptor array1, repeats, a
 cpdef utils.dpnp_descriptor dpnp_reshape(utils.dpnp_descriptor array1, newshape, order="C"):
     # return dpnp.get_dpnp_descriptor(dpctl.tensor.usm_ndarray(newshape, dtype=numpy.dtype(array1.dtype).name, buffer=array1.get_pyobj()))
     # return dpnp.get_dpnp_descriptor(dpctl.tensor.reshape(array1.get_pyobj(), newshape))
-    array_obj = dpctl.tensor.reshape(array1.get_array(), newshape, order=order)
-    return dpnp.get_dpnp_descriptor(dpnp_array(array_obj.shape, buffer=array_obj, order=order))
+    array1_obj = array1.get_array()
+    array_obj = dpctl.tensor.reshape(array1_obj, newshape, order=order)
+    return dpnp.get_dpnp_descriptor(dpnp_array(array_obj.shape,
+                                               buffer=array_obj,
+                                               order=order,
+                                               device=array1_obj.sycl_device,
+                                               usm_type=array1_obj.usm_type,
+                                               sycl_queue=array1_obj.sycl_queue))
 
 
 cpdef utils.dpnp_descriptor dpnp_transpose(utils.dpnp_descriptor array1, axes=None):
@@ -209,15 +260,36 @@ cpdef utils.dpnp_descriptor dpnp_transpose(utils.dpnp_descriptor array1, axes=No
     cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(array1.dtype)
 
     # get the FPTR data structure
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_TRANSPOSE, param1_type, param1_type)
+    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_TRANSPOSE_EXT, param1_type, param1_type)
+
+    array1_obj = array1.get_array()
 
     # ceate result array with type given by FPTR data
-    cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, kernel_data.return_type, None)
+    cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape,
+                                                                       kernel_data.return_type,
+                                                                       None,
+                                                                       device=array1_obj.sycl_device,
+                                                                       usm_type=array1_obj.usm_type,
+                                                                       sycl_queue=array1_obj.sycl_queue)
+    result_sycl_queue = result.get_array().sycl_queue
+
+    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
+    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
 
     cdef fptr_custom_elemwise_transpose_1in_1out_t func = <fptr_custom_elemwise_transpose_1in_1out_t > kernel_data.ptr
     # call FPTR function
-    func(array1.get_data(), input_shape.data(), result_shape.data(),
-         permute_axes.data(), input_shape_size, result.get_data(), array1.size)
+    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref,
+                                                    array1.get_data(),
+                                                    input_shape.data(),
+                                                    result_shape.data(),
+                                                    permute_axes.data(),
+                                                    input_shape_size,
+                                                    result.get_data(),
+                                                    array1.size,
+                                                    NULL)  # dep_events_ref
+
+    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
+    c_dpctl.DPCTLEvent_Delete(event_ref)
 
     return result
 
@@ -237,6 +309,6 @@ cpdef utils.dpnp_descriptor dpnp_squeeze(utils.dpnp_descriptor in_array, axis):
             else:
                 shape_list.push_back(in_array.shape[i])
 
-    cdef utils.dpnp_descriptor result = dpnp.get_dpnp_descriptor(dpnp.reshape(dpnp_copy(in_array).get_pyobj(), (shape_list)))
+    in_array_obj = in_array.get_array()
 
-    return result
+    return dpnp_reshape(dpnp_copy(in_array), shape_list)
diff --git a/dpnp/dpnp_algo/dpnp_algo_mathematical.pyx b/dpnp/dpnp_algo/dpnp_algo_mathematical.pyx
index b2c070d95b47..142163054bcd 100644
--- a/dpnp/dpnp_algo/dpnp_algo_mathematical.pyx
+++ b/dpnp/dpnp_algo/dpnp_algo_mathematical.pyx
@@ -74,28 +74,51 @@ __all__ += [
 ]
 
 
-ctypedef void(*fptr_custom_elemwise_absolute_1in_1out_t)(void * , void * , size_t)
-ctypedef void(*fptr_1in_2out_t)(void * , void * , void * , size_t)
-ctypedef void(*ftpr_custom_trapz_2in_1out_with_2size_t)(void *, void * , void * , double, size_t, size_t)
-ctypedef void(*ftpr_custom_around_1in_1out_t)(const void * , void * , const size_t, const int)
-
-
-cpdef utils.dpnp_descriptor dpnp_absolute(utils.dpnp_descriptor input):
-    cdef shape_type_c input_shape = input.shape
-    cdef size_t input_shape_size = input.ndim
+ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_custom_elemwise_absolute_1in_1out_t)(c_dpctl.DPCTLSyclQueueRef,
+                                                                              void * , void * , size_t,
+                                                                              const c_dpctl.DPCTLEventVectorRef)
+ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_1in_2out_t)(c_dpctl.DPCTLSyclQueueRef,
+                                                     void * , void * , void * , size_t,
+                                                     const c_dpctl.DPCTLEventVectorRef)
+ctypedef c_dpctl.DPCTLSyclEventRef(*ftpr_custom_trapz_2in_1out_with_2size_t)(c_dpctl.DPCTLSyclQueueRef,
+                                                                             void *, void * , void * , double, size_t, size_t,
+                                                                             const c_dpctl.DPCTLEventVectorRef)
+ctypedef c_dpctl.DPCTLSyclEventRef(*ftpr_custom_around_1in_1out_t)(c_dpctl.DPCTLSyclQueueRef,
+                                                                   const void * , void * , const size_t, const int,
+                                                                   const c_dpctl.DPCTLEventVectorRef)
+
+
+cpdef utils.dpnp_descriptor dpnp_absolute(utils.dpnp_descriptor x1):
+    cdef shape_type_c x1_shape = x1.shape
+    cdef size_t x1_shape_size = x1.ndim
 
     # convert string type names (array.dtype) to C enum DPNPFuncType
-    cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(input.dtype)
+    cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(x1.dtype)
 
     # get the FPTR data structure
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_ABSOLUTE, param1_type, param1_type)
+    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_ABSOLUTE_EXT, param1_type, param1_type)
+
+    x1_obj = x1.get_array()
 
     # ceate result array with type given by FPTR data
-    cdef utils.dpnp_descriptor result = utils.create_output_descriptor(input_shape, kernel_data.return_type, None)
+    cdef utils.dpnp_descriptor result = utils.create_output_descriptor(x1_shape,
+                                                                       kernel_data.return_type,
+                                                                       None,
+                                                                       device=x1_obj.sycl_device,
+                                                                       usm_type=x1_obj.usm_type,
+                                                                       sycl_queue=x1_obj.sycl_queue)
+
+    result_sycl_queue = result.get_array().sycl_queue
+
+    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
+    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
 
     cdef fptr_custom_elemwise_absolute_1in_1out_t func = <fptr_custom_elemwise_absolute_1in_1out_t > kernel_data.ptr
     # call FPTR function
-    func(input.get_data(), result.get_data(), input.size)
+    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, x1.get_data(), result.get_data(), x1.size, NULL)
+
+    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
+    c_dpctl.DPCTLEvent_Delete(event_ref)
 
     return result
 
@@ -120,15 +143,30 @@ cpdef utils.dpnp_descriptor dpnp_around(utils.dpnp_descriptor x1, int decimals):
 
     cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(x1.dtype)
 
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_AROUND, param1_type, param1_type)
+    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_AROUND_EXT, param1_type, param1_type)
+
+    x1_obj = x1.get_array()
 
     # ceate result array with type given by FPTR data
     cdef shape_type_c result_shape = x1.shape
-    cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, kernel_data.return_type, None)
+    cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape,
+                                                                       kernel_data.return_type,
+                                                                       None,
+                                                                       device=x1_obj.sycl_device,
+                                                                       usm_type=x1_obj.usm_type,
+                                                                       sycl_queue=x1_obj.sycl_queue)
+
+    result_sycl_queue = result.get_array().sycl_queue
+
+    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
+    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
 
     cdef ftpr_custom_around_1in_1out_t func = <ftpr_custom_around_1in_1out_t > kernel_data.ptr
 
-    func(x1.get_data(), result.get_data(), x1.size, decimals)
+    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, x1.get_data(), result.get_data(), x1.size, decimals, NULL)
+
+    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
+    c_dpctl.DPCTLEvent_Delete(event_ref)
 
     return result
 
@@ -184,11 +222,23 @@ cpdef utils.dpnp_descriptor dpnp_cumsum(utils.dpnp_descriptor x1):
 cpdef utils.dpnp_descriptor dpnp_diff(utils.dpnp_descriptor x1, int n):
     cdef utils.dpnp_descriptor res
 
+    x1_obj = x1.get_array()
+
     if x1.size - n < 1:
-        res = utils.dpnp_descriptor(dpnp.empty(0, dtype=x1.dtype))
+        res_obj = dpnp_container.empty(0,
+                                       dtype=x1.dtype,
+                                       device=x1_obj.sycl_device,
+                                       usm_type=x1_obj.usm_type,
+                                       sycl_queue=x1_obj.sycl_queue)
+        res = utils.dpnp_descriptor(res_obj)
         return res
 
-    res = utils.dpnp_descriptor(dpnp.empty(x1.size - 1, dtype=x1.dtype))
+    res_obj = dpnp_container.empty(x1.size - 1,
+                                   dtype=x1.dtype,
+                                   device=x1_obj.sycl_device,
+                                   usm_type=x1_obj.usm_type,
+                                   sycl_queue=x1_obj.sycl_queue)
+    res = utils.dpnp_descriptor(res_obj)
     for i in range(res.size):
         res.get_pyobj()[i] = x1.get_pyobj()[i + 1] - x1.get_pyobj()[i]
 
@@ -256,7 +306,7 @@ cpdef utils.dpnp_descriptor dpnp_ediff1d(utils.dpnp_descriptor x1):
                                                     NULL,
                                                     NULL)  # dep_events_ref
 
-    with nogil: c_dpctl.DPCTLEvent_Wait(event_ref)
+    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
     c_dpctl.DPCTLEvent_Delete(event_ref)
 
     return result
@@ -290,9 +340,16 @@ cpdef utils.dpnp_descriptor dpnp_gradient(utils.dpnp_descriptor y1, int dx=1):
 
     cdef size_t size = y1.size
 
+    y1_obj = y1.get_array()
+
     # ceate result array with type given by FPTR data
     cdef shape_type_c result_shape = utils._object_to_tuple(size)
-    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(result_shape, dpnp.float64, None)
+    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(result_shape,
+                                                                             dpnp.float64,
+                                                                             None,
+                                                                             device=y1_obj.sycl_device,
+                                                                             usm_type=y1_obj.usm_type,
+                                                                             sycl_queue=y1_obj.sycl_queue)
 
     cdef double cur = (y1.get_pyobj()[1] - y1.get_pyobj()[0]) / dx
 
@@ -338,16 +395,41 @@ cpdef tuple dpnp_modf(utils.dpnp_descriptor x1):
     cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(x1.dtype)
 
     """ get the FPTR data structure """
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_MODF, param1_type, DPNP_FT_NONE)
+    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_MODF_EXT, param1_type, DPNP_FT_NONE)
+
+    x1_obj = x1.get_array()
 
     # ceate result array with type given by FPTR data
     cdef shape_type_c result_shape = x1.shape
-    cdef utils.dpnp_descriptor result1 = utils.create_output_descriptor(result_shape, kernel_data.return_type, None)
-    cdef utils.dpnp_descriptor result2 = utils.create_output_descriptor(result_shape, kernel_data.return_type, None)
+    cdef utils.dpnp_descriptor result1 = utils.create_output_descriptor(result_shape,
+                                                                        kernel_data.return_type,
+                                                                        None,
+                                                                        device=x1_obj.sycl_device,
+                                                                        usm_type=x1_obj.usm_type,
+                                                                        sycl_queue=x1_obj.sycl_queue)
+    cdef utils.dpnp_descriptor result2 = utils.create_output_descriptor(result_shape,
+                                                                        kernel_data.return_type,
+                                                                        None,
+                                                                        device=x1_obj.sycl_device,
+                                                                        usm_type=x1_obj.usm_type,
+                                                                        sycl_queue=x1_obj.sycl_queue)
+
+    _, _, result_sycl_queue = utils.get_common_usm_allocation(result1, result2)
+
+    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
+    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
 
     cdef fptr_1in_2out_t func = <fptr_1in_2out_t > kernel_data.ptr
     """ Call FPTR function """
-    func(x1.get_data(), result1.get_data(), result2.get_data(), x1.size)
+    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref,
+                                                    x1.get_data(),
+                                                    result1.get_data(),
+                                                    result2.get_data(),
+                                                    x1.size,
+                                                    NULL)  # dep_events_ref
+
+    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
+    c_dpctl.DPCTLEvent_Delete(event_ref)
 
     return (result1.get_pyobj(), result2.get_pyobj())
 
@@ -387,7 +469,13 @@ cpdef utils.dpnp_descriptor dpnp_nancumsum(utils.dpnp_descriptor x1):
 
 
 cpdef utils.dpnp_descriptor dpnp_nanprod(utils.dpnp_descriptor x1):
-    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(x1.shape, x1.dtype, None)
+    x1_obj = x1.get_array()
+    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(x1.shape,
+                                                                             x1.dtype,
+                                                                             None,
+                                                                             device=x1_obj.sycl_device,
+                                                                             usm_type=x1_obj.usm_type,
+                                                                             sycl_queue=x1_obj.sycl_queue)
 
     for i in range(result.size):
         input_elem = x1.get_pyobj().flat[i]
@@ -401,7 +489,13 @@ cpdef utils.dpnp_descriptor dpnp_nanprod(utils.dpnp_descriptor x1):
 
 
 cpdef utils.dpnp_descriptor dpnp_nansum(utils.dpnp_descriptor x1):
-    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(x1.shape, x1.dtype, None)
+    x1_obj = x1.get_array()
+    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(x1.shape,
+                                                                             x1.dtype,
+                                                                             None,
+                                                                             device=x1_obj.sycl_device,
+                                                                             usm_type=x1_obj.usm_type,
+                                                                             sycl_queue=x1_obj.sycl_queue)
 
     for i in range(result.size):
         input_elem = x1.get_pyobj().flat[i]
@@ -426,7 +520,7 @@ cpdef utils.dpnp_descriptor dpnp_power(utils.dpnp_descriptor x1_obj,
     return call_fptr_2in_1out_strides(DPNP_FN_POWER_EXT, x1_obj, x2_obj, dtype, out, where, func_name="power")
 
 
-cpdef utils.dpnp_descriptor dpnp_prod(utils.dpnp_descriptor input,
+cpdef utils.dpnp_descriptor dpnp_prod(utils.dpnp_descriptor x1,
                                       object axis=None,
                                       object dtype=None,
                                       utils.dpnp_descriptor out=None,
@@ -443,24 +537,47 @@ cpdef utils.dpnp_descriptor dpnp_prod(utils.dpnp_descriptor input,
     input:complex128: outout:complex128: name:prod
     """
 
-    cdef shape_type_c input_shape = input.shape
-    cdef DPNPFuncType input_c_type = dpnp_dtype_to_DPNPFuncType(input.dtype)
+    cdef shape_type_c x1_shape = x1.shape
+    cdef DPNPFuncType x1_c_type = dpnp_dtype_to_DPNPFuncType(x1.dtype)
 
     cdef shape_type_c axis_shape = utils._object_to_tuple(axis)
 
-    cdef shape_type_c result_shape = utils.get_reduction_output_shape(input_shape, axis, keepdims)
-    cdef DPNPFuncType result_c_type = utils.get_output_c_type(DPNP_FN_PROD, input_c_type, out, dtype)
+    cdef shape_type_c result_shape = utils.get_reduction_output_shape(x1_shape, axis, keepdims)
+    cdef DPNPFuncType result_c_type = utils.get_output_c_type(DPNP_FN_PROD_EXT, x1_c_type, out, dtype)
 
     """ select kernel """
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_PROD, input_c_type, result_c_type)
+    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_PROD_EXT, x1_c_type, result_c_type)
+
+    x1_obj = x1.get_array()
 
     """ Create result array """
-    cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, result_c_type, out)
+    cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape,
+                                                                       result_c_type,
+                                                                       out,
+                                                                       device=x1_obj.sycl_device,
+                                                                       usm_type=x1_obj.usm_type,
+                                                                       sycl_queue=x1_obj.sycl_queue)
     cdef dpnp_reduction_c_t func = <dpnp_reduction_c_t > kernel_data.ptr
 
+    result_sycl_queue = result.get_array().sycl_queue
+
+    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
+    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
+
     """ Call FPTR interface function """
-    func(result.get_data(), input.get_data(), input_shape.data(),
-         input_shape.size(), axis_shape.data(), axis_shape.size(), NULL, NULL)
+    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref,
+                                                    result.get_data(),
+                                                    x1.get_data(),
+                                                    x1_shape.data(),
+                                                    x1_shape.size(),
+                                                    axis_shape.data(),
+                                                    axis_shape.size(),
+                                                    NULL,
+                                                    NULL,
+                                                    NULL)  # dep_events_ref
+
+    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
+    c_dpctl.DPCTLEvent_Delete(event_ref)
 
     return result
 
@@ -485,7 +602,7 @@ cpdef utils.dpnp_descriptor dpnp_subtract(utils.dpnp_descriptor x1_obj,
     return call_fptr_2in_1out_strides(DPNP_FN_SUBTRACT_EXT, x1_obj, x2_obj, dtype, out, where)
 
 
-cpdef utils.dpnp_descriptor dpnp_sum(utils.dpnp_descriptor input,
+cpdef utils.dpnp_descriptor dpnp_sum(utils.dpnp_descriptor x1,
                                      object axis=None,
                                      object dtype=None,
                                      utils.dpnp_descriptor out=None,
@@ -493,24 +610,47 @@ cpdef utils.dpnp_descriptor dpnp_sum(utils.dpnp_descriptor input,
                                      object initial=None,
                                      object where=True):
 
-    cdef shape_type_c input_shape = input.shape
-    cdef DPNPFuncType input_c_type = dpnp_dtype_to_DPNPFuncType(input.dtype)
+    cdef shape_type_c x1_shape = x1.shape
+    cdef DPNPFuncType x1_c_type = dpnp_dtype_to_DPNPFuncType(x1.dtype)
 
     cdef shape_type_c axis_shape = utils._object_to_tuple(axis)
 
-    cdef shape_type_c result_shape = utils.get_reduction_output_shape(input_shape, axis, keepdims)
-    cdef DPNPFuncType result_c_type = utils.get_output_c_type(DPNP_FN_SUM, input_c_type, out, dtype)
+    cdef shape_type_c result_shape = utils.get_reduction_output_shape(x1_shape, axis, keepdims)
+    cdef DPNPFuncType result_c_type = utils.get_output_c_type(DPNP_FN_SUM_EXT, x1_c_type, out, dtype)
 
     """ select kernel """
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_SUM, input_c_type, result_c_type)
+    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_SUM_EXT, x1_c_type, result_c_type)
+
+    x1_obj = x1.get_array()
 
     """ Create result array """
-    cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, result_c_type, out)
+    cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape,
+                                                                       result_c_type,
+                                                                       out,
+                                                                       device=x1_obj.sycl_device,
+                                                                       usm_type=x1_obj.usm_type,
+                                                                       sycl_queue=x1_obj.sycl_queue)
+
+    result_sycl_queue = result.get_array().sycl_queue
+
+    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
+    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
 
     """ Call FPTR interface function """
     cdef dpnp_reduction_c_t func = <dpnp_reduction_c_t > kernel_data.ptr
-    func(result.get_data(), input.get_data(), input_shape.data(),
-         input_shape.size(), axis_shape.data(), axis_shape.size(), NULL, NULL)
+    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref,
+                                                    result.get_data(),
+                                                    x1.get_data(),
+                                                    x1_shape.data(),
+                                                    x1_shape.size(),
+                                                    axis_shape.data(),
+                                                    axis_shape.size(),
+                                                    NULL,
+                                                    NULL,
+                                                    NULL)  # dep_events_ref
+
+    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
+    c_dpctl.DPCTLEvent_Delete(event_ref)
 
     return result
 
@@ -519,14 +659,36 @@ cpdef utils.dpnp_descriptor dpnp_trapz(utils.dpnp_descriptor y1, utils.dpnp_desc
 
     cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(y1.dtype)
     cdef DPNPFuncType param2_type = dpnp_dtype_to_DPNPFuncType(x1.dtype)
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_TRAPZ, param1_type, param2_type)
+    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_TRAPZ_EXT, param1_type, param2_type)
+
+    result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(y1, x1)
 
     # ceate result array with type given by FPTR data
     cdef shape_type_c result_shape = (1,)
-    cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, kernel_data.return_type, None)
+    cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape,
+                                                                       kernel_data.return_type,
+                                                                       None,
+                                                                       device=result_sycl_device,
+                                                                       usm_type=result_usm_type,
+                                                                       sycl_queue=result_sycl_queue)
+
+    result_sycl_queue = result.get_array().sycl_queue
+
+    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
+    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
 
     cdef ftpr_custom_trapz_2in_1out_with_2size_t func = <ftpr_custom_trapz_2in_1out_with_2size_t > kernel_data.ptr
-    func(y1.get_data(), x1.get_data(), result.get_data(), dx, y1.size, x1.size)
+    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref,
+                                                    y1.get_data(),
+                                                    x1.get_data(),
+                                                    result.get_data(),
+                                                    dx,
+                                                    y1.size,
+                                                    x1.size,
+                                                    NULL)  # dep_events_ref
+
+    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
+    c_dpctl.DPCTLEvent_Delete(event_ref)
 
     return result
 
diff --git a/dpnp/dpnp_algo/dpnp_algo_searching.pyx b/dpnp/dpnp_algo/dpnp_algo_searching.pyx
index 249d1453bea3..59ce8475181a 100644
--- a/dpnp/dpnp_algo/dpnp_algo_searching.pyx
+++ b/dpnp/dpnp_algo/dpnp_algo_searching.pyx
@@ -41,22 +41,43 @@ __all__ += [
 
 
 # C function pointer to the C library template functions
-ctypedef void(*custom_search_1in_1out_func_ptr_t)(void * , void * , size_t)
+ctypedef c_dpctl.DPCTLSyclEventRef(*custom_search_1in_1out_func_ptr_t)(c_dpctl.DPCTLSyclQueueRef,
+                                                                       void * , void * , size_t,
+                                                                       const c_dpctl.DPCTLEventVectorRef)
 
 
 cpdef utils.dpnp_descriptor dpnp_argmax(utils.dpnp_descriptor in_array1):
     cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(in_array1.dtype)
     cdef DPNPFuncType output_type = dpnp_dtype_to_DPNPFuncType(dpnp.int64)
 
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_ARGMAX, param1_type, output_type)
+    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_ARGMAX_EXT, param1_type, output_type)
+
+    in_array1_obj = in_array1.get_array()
 
     # ceate result array with type given by FPTR data
     cdef shape_type_c result_shape = (1,)
-    cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, kernel_data.return_type, None)
+    cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape,
+                                                                       kernel_data.return_type,
+                                                                       None,
+                                                                       device=in_array1_obj.sycl_device,
+                                                                       usm_type=in_array1_obj.usm_type,
+                                                                       sycl_queue=in_array1_obj.sycl_queue)
+
+    result_sycl_queue = result.get_array().sycl_queue
+
+    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
+    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
 
     cdef custom_search_1in_1out_func_ptr_t func = <custom_search_1in_1out_func_ptr_t > kernel_data.ptr
 
-    func(in_array1.get_data(), result.get_data(), in_array1.size)
+    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref,
+                                                    in_array1.get_data(),
+                                                    result.get_data(),
+                                                    in_array1.size,
+                                                    NULL)  # dep_events_ref
+
+    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
+    c_dpctl.DPCTLEvent_Delete(event_ref)
 
     return result
 
@@ -65,14 +86,33 @@ cpdef utils.dpnp_descriptor dpnp_argmin(utils.dpnp_descriptor in_array1):
     cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(in_array1.dtype)
     cdef DPNPFuncType output_type = dpnp_dtype_to_DPNPFuncType(dpnp.int64)
 
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_ARGMIN, param1_type, output_type)
+    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_ARGMIN_EXT, param1_type, output_type)
+
+    in_array1_obj = in_array1.get_array()
 
     # ceate result array with type given by FPTR data
     cdef shape_type_c result_shape = (1,)
-    cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, kernel_data.return_type, None)
+    cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape,
+                                                                       kernel_data.return_type,
+                                                                       None,
+                                                                       device=in_array1_obj.sycl_device,
+                                                                       usm_type=in_array1_obj.usm_type,
+                                                                       sycl_queue=in_array1_obj.sycl_queue)
+
+    result_sycl_queue = result.get_array().sycl_queue
+
+    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
+    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
 
     cdef custom_search_1in_1out_func_ptr_t func = <custom_search_1in_1out_func_ptr_t > kernel_data.ptr
 
-    func(in_array1.get_data(), result.get_data(), in_array1.size)
+    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref,
+                                                    in_array1.get_data(),
+                                                    result.get_data(),
+                                                    in_array1.size,
+                                                    NULL)  # dep_events_ref
+
+    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
+    c_dpctl.DPCTLEvent_Delete(event_ref)
 
     return result
diff --git a/dpnp/dpnp_algo/dpnp_algo_sorting.pyx b/dpnp/dpnp_algo/dpnp_algo_sorting.pyx
index 93a76e81a18b..9a701dd7c905 100644
--- a/dpnp/dpnp_algo/dpnp_algo_sorting.pyx
+++ b/dpnp/dpnp_algo/dpnp_algo_sorting.pyx
@@ -42,8 +42,22 @@ __all__ += [
 ]
 
 
-ctypedef void(*fptr_dpnp_partition_t)(void * , void * , void * , const size_t , const shape_elem_type * , const size_t)
-ctypedef void(*fptr_dpnp_searchsorted_t)(void * , const void * , const void * , bool , const size_t , const size_t )
+ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_partition_t)(c_dpctl.DPCTLSyclQueueRef,
+                                                           void * ,
+                                                           void * ,
+                                                           void * ,
+                                                           const size_t,
+                                                           const shape_elem_type * ,
+                                                           const size_t,
+                                                           const c_dpctl.DPCTLEventVectorRef)
+ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_searchsorted_t)(c_dpctl.DPCTLSyclQueueRef,
+                                                              void * ,
+                                                              const void * ,
+                                                              const void * ,
+                                                              bool,
+                                                              const size_t,
+                                                              const size_t,
+                                                              const c_dpctl.DPCTLEventVectorRef)
 
 
 cpdef utils.dpnp_descriptor dpnp_argsort(utils.dpnp_descriptor x1):
@@ -59,15 +73,37 @@ cpdef utils.dpnp_descriptor dpnp_partition(utils.dpnp_descriptor arr, int kth, a
     cdef size_t kth_ = kth if kth >= 0 else (arr.ndim + kth)
     cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(arr.dtype)
 
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_PARTITION, param1_type, param1_type)
+    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_PARTITION_EXT, param1_type, param1_type)
 
     cdef utils.dpnp_descriptor arr2 = dpnp_copy(arr)
 
-    cdef utils.dpnp_descriptor result = utils.create_output_descriptor(arr.shape, kernel_data.return_type, None)
+    arr_obj = arr.get_array()
+
+    cdef utils.dpnp_descriptor result = utils.create_output_descriptor(arr.shape,
+                                                                       kernel_data.return_type,
+                                                                       None,
+                                                                       device=arr_obj.sycl_device,
+                                                                       usm_type=arr_obj.usm_type,
+                                                                       sycl_queue=arr_obj.sycl_queue)
+
+    result_sycl_queue = result.get_array().sycl_queue
+
+    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
+    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
 
     cdef fptr_dpnp_partition_t func = <fptr_dpnp_partition_t > kernel_data.ptr
 
-    func(arr.get_data(), arr2.get_data(), result.get_data(), kth_, shape1.data(), arr.ndim)
+    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref,
+                                                    arr.get_data(),
+                                                    arr2.get_data(),
+                                                    result.get_data(),
+                                                    kth_,
+                                                    shape1.data(),
+                                                    arr.ndim,
+                                                    NULL)  # dep_events_ref
+
+    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
+    c_dpctl.DPCTLEvent_Delete(event_ref)
 
     return result
 
@@ -80,13 +116,35 @@ cpdef utils.dpnp_descriptor dpnp_searchsorted(utils.dpnp_descriptor arr, utils.d
 
     cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(arr.dtype)
 
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_SEARCHSORTED, param1_type, param1_type)
+    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_SEARCHSORTED_EXT, param1_type, param1_type)
+
+    arr_obj = arr.get_array()
+
+    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(v.shape,
+                                                                             dpnp.int64,
+                                                                             None,
+                                                                             device=arr_obj.sycl_device,
+                                                                             usm_type=arr_obj.usm_type,
+                                                                             sycl_queue=arr_obj.sycl_queue)
+
+    result_sycl_queue = result.get_array().sycl_queue
 
-    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(v.shape, dpnp.int64, None)
+    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
+    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
 
     cdef fptr_dpnp_searchsorted_t func = <fptr_dpnp_searchsorted_t > kernel_data.ptr
 
-    func(arr.get_data(), v.get_data(), result.get_data(), side_, arr.size, v.size)
+    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref,
+                                                    arr.get_data(),
+                                                    v.get_data(),
+                                                    result.get_data(),
+                                                    side_,
+                                                    arr.size,
+                                                    v.size,
+                                                    NULL)  # dep_events_ref
+
+    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
+    c_dpctl.DPCTLEvent_Delete(event_ref)
 
     return result
 
diff --git a/dpnp/dpnp_algo/dpnp_algo_statistics.pyx b/dpnp/dpnp_algo/dpnp_algo_statistics.pyx
index 5ff4c785d96f..872209df371f 100644
--- a/dpnp/dpnp_algo/dpnp_algo_statistics.pyx
+++ b/dpnp/dpnp_algo/dpnp_algo_statistics.pyx
@@ -49,16 +49,31 @@ __all__ += [
 
 
 # C function pointer to the C library template functions
-ctypedef void(*fptr_custom_cov_1in_1out_t)(void *, void * , size_t, size_t)
-ctypedef void(*fptr_custom_nanvar_t)(void *, void * , void * , size_t, size_t)
-ctypedef void(*fptr_custom_std_var_1in_1out_t)(void *, void * , shape_elem_type * , size_t,
-                                               shape_elem_type * , size_t, size_t)
+ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_custom_cov_1in_1out_t)(c_dpctl.DPCTLSyclQueueRef,
+                                                                void *, void * , size_t, size_t,
+                                                                const c_dpctl.DPCTLEventVectorRef)
+ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_custom_nanvar_t)(c_dpctl.DPCTLSyclQueueRef,
+                                                          void *, void * , void * , size_t, size_t,
+                                                          const c_dpctl.DPCTLEventVectorRef)
+ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_custom_std_var_1in_1out_t)(c_dpctl.DPCTLSyclQueueRef,
+                                                                    void *, void * , shape_elem_type * , size_t,
+                                                                    shape_elem_type * , size_t, size_t,
+                                                                    const c_dpctl.DPCTLEventVectorRef)
 
 # C function pointer to the C library template functions
-ctypedef void(*custom_statistic_1in_1out_func_ptr_t)(void *, void * , shape_elem_type * , size_t,
-                                                     shape_elem_type * , size_t)
-ctypedef void(*custom_statistic_1in_1out_func_ptr_t_max)(void *, void * , const size_t, shape_elem_type * , size_t,
-                                                         shape_elem_type * , size_t)
+ctypedef c_dpctl.DPCTLSyclEventRef(*custom_statistic_1in_1out_func_ptr_t)(c_dpctl.DPCTLSyclQueueRef,
+                                                                          void *, void * , shape_elem_type * , size_t,
+                                                                          shape_elem_type * , size_t,
+                                                                          const c_dpctl.DPCTLEventVectorRef)
+ctypedef c_dpctl.DPCTLSyclEventRef(*custom_statistic_1in_1out_func_ptr_t_max)(c_dpctl.DPCTLSyclQueueRef,
+                                                                              void *,
+                                                                              void * ,
+                                                                              const size_t,
+                                                                              shape_elem_type * ,
+                                                                              size_t,
+                                                                              shape_elem_type * ,
+                                                                              size_t,
+                                                                              const c_dpctl.DPCTLEventVectorRef)
 
 
 cdef utils.dpnp_descriptor call_fptr_custom_std_var_1in_1out(DPNPFuncName fptr_name, utils.dpnp_descriptor x1, ddof):
@@ -70,9 +85,21 @@ cdef utils.dpnp_descriptor call_fptr_custom_std_var_1in_1out(DPNPFuncName fptr_n
     """ get the FPTR data structure """
     cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(fptr_name, param_type, DPNP_FT_NONE)
 
+    x1_obj = x1.get_array()
+
     # ceate result array with type given by FPTR data
     cdef shape_type_c result_shape = (1,)
-    cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, kernel_data.return_type, None)
+    cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape,
+                                                                       kernel_data.return_type,
+                                                                       None,
+                                                                       device=x1_obj.sycl_device,
+                                                                       usm_type=x1_obj.usm_type,
+                                                                       sycl_queue=x1_obj.sycl_queue)
+
+    result_sycl_queue = result.get_array().sycl_queue
+
+    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
+    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
 
     cdef fptr_custom_std_var_1in_1out_t func = <fptr_custom_std_var_1in_1out_t > kernel_data.ptr
 
@@ -81,8 +108,19 @@ cdef utils.dpnp_descriptor call_fptr_custom_std_var_1in_1out(DPNPFuncName fptr_n
     cdef Py_ssize_t axis_size = 0
 
     """ Call FPTR function """
-    func(x1.get_data(), result.get_data(), x1_shape.data(),
-         x1.ndim, axis.data(), axis_size, ddof)
+    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref,
+                                                    x1.get_data(),
+                                                    result.get_data(),
+                                                    x1_shape.data(),
+                                                    x1.ndim,
+                                                    axis.data(),
+                                                    axis_size,
+                                                    ddof,
+                                                    NULL)  # dep_events_ref
+
+    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
+    c_dpctl.DPCTLEvent_Delete(event_ref)
+
 
     return result
 
@@ -134,7 +172,7 @@ cpdef utils.dpnp_descriptor dpnp_correlate(utils.dpnp_descriptor x1, utils.dpnp_
                                                     NULL,
                                                     NULL)  # dep_events_ref
 
-    with nogil: c_dpctl.DPCTLEvent_Wait(event_ref)
+    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
     c_dpctl.DPCTLEvent_Delete(event_ref)
 
     return result
@@ -151,27 +189,59 @@ cpdef utils.dpnp_descriptor dpnp_cov(utils.dpnp_descriptor array1):
     cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(array1.dtype)
 
     # get the FPTR data structure
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_COV, param1_type, param1_type)
+    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_COV_EXT, param1_type, param1_type)
+
+    array1_obj = array1.get_array()
 
     # ceate result array with type given by FPTR data
     cdef shape_type_c result_shape = (input_shape[0], input_shape[0])
-    cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, kernel_data.return_type, None)
+    cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape,
+                                                                       kernel_data.return_type,
+                                                                       None,
+                                                                       device=array1_obj.sycl_device,
+                                                                       usm_type=array1_obj.usm_type,
+                                                                       sycl_queue=array1_obj.sycl_queue)
+
+    result_sycl_queue = result.get_array().sycl_queue
+
+    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
+    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
 
     cdef fptr_custom_cov_1in_1out_t func = <fptr_custom_cov_1in_1out_t > kernel_data.ptr
     # call FPTR function
-    func(array1.get_data(), result.get_data(), input_shape[0], input_shape[1])
+    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref,
+                                                    array1.get_data(),
+                                                    result.get_data(),
+                                                    input_shape[0],
+                                                    input_shape[1],
+                                                    NULL)  # dep_events_ref
+
+    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
+    c_dpctl.DPCTLEvent_Delete(event_ref)
 
     return result
 
 
-cdef utils.dpnp_descriptor _dpnp_max(utils.dpnp_descriptor input, _axis_, shape_type_c result_shape):
-    cdef shape_type_c input_shape = input.shape
-    cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(input.dtype)
+cdef utils.dpnp_descriptor _dpnp_max(utils.dpnp_descriptor x1, _axis_, shape_type_c result_shape):
+    cdef shape_type_c x1_shape = x1.shape
+    cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(x1.dtype)
 
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_MAX, param1_type, param1_type)
+    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_MAX_EXT, param1_type, param1_type)
+
+    x1_obj = x1.get_array()
 
     # ceate result array with type given by FPTR data
-    cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, kernel_data.return_type, None)
+    cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape,
+                                                                       kernel_data.return_type,
+                                                                       None,
+                                                                       device=x1_obj.sycl_device,
+                                                                       usm_type=x1_obj.usm_type,
+                                                                       sycl_queue=x1_obj.sycl_queue)
+
+    result_sycl_queue = result.get_array().sycl_queue
+
+    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
+    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
 
     cdef custom_statistic_1in_1out_func_ptr_t_max func = <custom_statistic_1in_1out_func_ptr_t_max > kernel_data.ptr
     cdef shape_type_c axis
@@ -185,19 +255,24 @@ cdef utils.dpnp_descriptor _dpnp_max(utils.dpnp_descriptor input, _axis_, shape_
             axis_.push_back(shape_it)
         axis_size = len(axis)
 
-    func(input.get_data(),
-         result.get_data(),
-         result.size,
-         input_shape.data(),
-         input.ndim,
-         axis_.data(),
-         axis_size)
+    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref,
+                                                    x1.get_data(),
+                                                    result.get_data(),
+                                                    result.size,
+                                                    x1_shape.data(),
+                                                    x1.ndim,
+                                                    axis_.data(),
+                                                    axis_size,
+                                                    NULL)  # dep_events_ref
+
+    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
+    c_dpctl.DPCTLEvent_Delete(event_ref)
 
     return result
 
 
-cpdef utils.dpnp_descriptor dpnp_max(utils.dpnp_descriptor input, axis):
-    cdef shape_type_c shape_input = input.shape
+cpdef utils.dpnp_descriptor dpnp_max(utils.dpnp_descriptor x1, axis):
+    cdef shape_type_c x1_shape = x1.shape
     cdef shape_type_c output_shape
 
     if axis is None:
@@ -206,35 +281,47 @@ cpdef utils.dpnp_descriptor dpnp_max(utils.dpnp_descriptor input, axis):
     else:
         if isinstance(axis, int):
             if axis < 0:
-                axis_ = tuple([input.ndim - axis])
+                axis_ = tuple([x1.ndim - axis])
             else:
                 axis_ = tuple([axis])
         else:
             _axis_ = []
             for i in range(len(axis)):
                 if axis[i] < 0:
-                    _axis_.append(input.ndim - axis[i])
+                    _axis_.append(x1.ndim - axis[i])
                 else:
                     _axis_.append(axis[i])
             axis_ = tuple(_axis_)
 
-        output_shape.resize(len(shape_input) - len(axis_), 0)
+        output_shape.resize(len(x1_shape) - len(axis_), 0)
         ind = 0
-        for id, shape_axis in enumerate(shape_input):
+        for id, shape_axis in enumerate(x1_shape):
             if id not in axis_:
                 output_shape[ind] = shape_axis
                 ind += 1
 
-    return _dpnp_max(input, axis_, output_shape)
+    return _dpnp_max(x1, axis_, output_shape)
 
 
-cpdef utils.dpnp_descriptor _dpnp_mean(utils.dpnp_descriptor input):
-    cdef shape_type_c input_shape = input.shape
-    cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(input.dtype)
+cpdef utils.dpnp_descriptor _dpnp_mean(utils.dpnp_descriptor x1):
+    cdef shape_type_c x1_shape = x1.shape
+    cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(x1.dtype)
+
+    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_MEAN_EXT, param1_type, param1_type)
+
+    x1_obj = x1.get_array()
 
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_MEAN, param1_type, param1_type)
+    cdef utils.dpnp_descriptor result = utils.create_output_descriptor((1,),
+                                                                       kernel_data.return_type,
+                                                                       None,
+                                                                       device=x1_obj.sycl_device,
+                                                                       usm_type=x1_obj.usm_type,
+                                                                       sycl_queue=x1_obj.sycl_queue)
 
-    cdef utils.dpnp_descriptor result = utils.create_output_descriptor((1,), kernel_data.return_type, None)
+    result_sycl_queue = result.get_array().sycl_queue
+
+    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
+    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
 
     cdef custom_statistic_1in_1out_func_ptr_t func = <custom_statistic_1in_1out_func_ptr_t > kernel_data.ptr
 
@@ -242,31 +329,36 @@ cpdef utils.dpnp_descriptor _dpnp_mean(utils.dpnp_descriptor input):
     cdef shape_type_c axis
     cdef Py_ssize_t axis_size = 0
 
-    func(input.get_data(),
-         result.get_data(),
-         input_shape.data(),
-         input.ndim,
-         axis.data(),
-         axis_size)
+    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref,
+                                                    x1.get_data(),
+                                                    result.get_data(),
+                                                    x1_shape.data(),
+                                                    x1.ndim,
+                                                    axis.data(),
+                                                    axis_size,
+                                                    NULL)  # dep_events_ref
+
+    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
+    c_dpctl.DPCTLEvent_Delete(event_ref)
 
     return result
 
 
-cpdef object dpnp_mean(utils.dpnp_descriptor input, axis):
+cpdef object dpnp_mean(utils.dpnp_descriptor x1, axis):
     cdef shape_type_c output_shape
 
     if axis is None:
-        return _dpnp_mean(input).get_pyobj()
+        return _dpnp_mean(x1).get_pyobj()
 
-    cdef long size_input = input.size
-    cdef shape_type_c shape_input = input.shape
+    cdef long x1_size = x1.size
+    cdef shape_type_c x1_shape = x1.shape
 
-    if input.dtype == dpnp.float32:
+    if x1.dtype == dpnp.float32:
         res_type = dpnp.float32
     else:
         res_type = dpnp.float64
 
-    if size_input == 0:
+    if x1_size == 0:
         return dpnp.array([dpnp.nan], dtype=res_type)
 
     if isinstance(axis, int):
@@ -277,9 +369,9 @@ cpdef object dpnp_mean(utils.dpnp_descriptor input, axis):
     if axis_ is None:
         output_shape.push_back(1)
     else:
-        output_shape = (0, ) * (len(shape_input) - len(axis_))
+        output_shape = (0, ) * (len(x1_shape) - len(axis_))
         ind = 0
-        for id, shape_axis in enumerate(shape_input):
+        for id, shape_axis in enumerate(x1_shape):
             if id not in axis_:
                 output_shape[ind] = shape_axis
                 ind += 1
@@ -290,15 +382,15 @@ cpdef object dpnp_mean(utils.dpnp_descriptor input, axis):
             prod *= output_shape[i]
 
     result_array = [None] * prod
-    input_shape_offsets = [None] * len(shape_input)
+    input_shape_offsets = [None] * len(x1_shape)
     acc = 1
 
-    for i in range(len(shape_input)):
-        ind = len(shape_input) - 1 - i
+    for i in range(len(x1_shape)):
+        ind = len(x1_shape) - 1 - i
         input_shape_offsets[ind] = acc
-        acc *= shape_input[ind]
+        acc *= x1_shape[ind]
 
-    output_shape_offsets = [None] * len(shape_input)
+    output_shape_offsets = [None] * len(x1_shape)
     acc = 1
 
     if axis_ is not None:
@@ -310,7 +402,7 @@ cpdef object dpnp_mean(utils.dpnp_descriptor input, axis):
         for i in axis_:
             result_offsets[i] = 0
 
-    for source_idx in range(size_input):
+    for source_idx in range(x1_size):
 
         # reconstruct x,y,z from linear source_idx
         xyz = []
@@ -346,11 +438,11 @@ cpdef object dpnp_mean(utils.dpnp_descriptor input, axis):
             else:
                 result_array[result_offset] += input_elem
 
-    del_ = size_input
+    del_ = x1_size
     if axis_ is not None:
-        for i in range(len(shape_input)):
+        for i in range(len(x1_shape)):
             if i not in axis_:
-                del_ = del_ / shape_input[i]
+                del_ = del_ / x1_shape[i]
     dpnp_array = dpnp.array(result_array, dtype=input.dtype)
     dpnp_result_array = dpnp.reshape(dpnp_array, output_shape)
     return dpnp_result_array / del_
@@ -360,9 +452,21 @@ cpdef utils.dpnp_descriptor dpnp_median(utils.dpnp_descriptor array1):
     cdef shape_type_c x1_shape = array1.shape
     cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(array1.dtype)
 
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_MEDIAN, param1_type, param1_type)
+    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_MEDIAN_EXT, param1_type, param1_type)
+
+    array1_obj = array1.get_array()
+
+    cdef utils.dpnp_descriptor result = utils.create_output_descriptor((1,),
+                                                                       kernel_data.return_type,
+                                                                       None,
+                                                                       device=array1_obj.sycl_device,
+                                                                       usm_type=array1_obj.usm_type,
+                                                                       sycl_queue=array1_obj.sycl_queue)
 
-    cdef utils.dpnp_descriptor result = utils.create_output_descriptor((1,), kernel_data.return_type, None)
+    result_sycl_queue = result.get_array().sycl_queue
+
+    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
+    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
 
     cdef custom_statistic_1in_1out_func_ptr_t func = <custom_statistic_1in_1out_func_ptr_t > kernel_data.ptr
 
@@ -370,23 +474,40 @@ cpdef utils.dpnp_descriptor dpnp_median(utils.dpnp_descriptor array1):
     cdef shape_type_c axis
     cdef Py_ssize_t axis_size = 0
 
-    func(array1.get_data(),
-         result.get_data(),
-         x1_shape.data(),
-         array1.ndim,
-         axis.data(),
-         axis_size)
+    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref,
+                                                    array1.get_data(),
+                                                    result.get_data(),
+                                                    x1_shape.data(),
+                                                    array1.ndim,
+                                                    axis.data(),
+                                                    axis_size,
+                                                    NULL)  # dep_events_ref
+
+    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
+    c_dpctl.DPCTLEvent_Delete(event_ref)
 
     return result
 
 
-cpdef utils.dpnp_descriptor _dpnp_min(utils.dpnp_descriptor input, _axis_, shape_type_c shape_output):
-    cdef shape_type_c input_shape = input.shape
-    cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(input.dtype)
+cpdef utils.dpnp_descriptor _dpnp_min(utils.dpnp_descriptor x1, _axis_, shape_type_c shape_output):
+    cdef shape_type_c x1_shape = x1.shape
+    cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(x1.dtype)
 
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_MIN, param1_type, param1_type)
+    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_MIN_EXT, param1_type, param1_type)
 
-    cdef utils.dpnp_descriptor result = utils.create_output_descriptor(shape_output, kernel_data.return_type, None)
+    x1_obj = x1.get_array()
+
+    cdef utils.dpnp_descriptor result = utils.create_output_descriptor(shape_output,
+                                                                       kernel_data.return_type,
+                                                                       None,
+                                                                       device=x1_obj.sycl_device,
+                                                                       usm_type=x1_obj.usm_type,
+                                                                       sycl_queue=x1_obj.sycl_queue)
+
+    result_sycl_queue = result.get_array().sycl_queue
+
+    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
+    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
 
     cdef custom_statistic_1in_1out_func_ptr_t_max func = <custom_statistic_1in_1out_func_ptr_t_max > kernel_data.ptr
     cdef shape_type_c axis
@@ -402,19 +523,24 @@ cpdef utils.dpnp_descriptor _dpnp_min(utils.dpnp_descriptor input, _axis_, shape
             axis_.push_back(shape_it)
         axis_size = len(axis)
 
-    func(input.get_data(),
-         result.get_data(),
-         result.size,
-         input_shape.data(),
-         input.ndim,
-         axis_.data(),
-         axis_size)
+    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref,
+                                                    x1.get_data(),
+                                                    result.get_data(),
+                                                    result.size,
+                                                    x1_shape.data(),
+                                                    x1.ndim,
+                                                    axis_.data(),
+                                                    axis_size,
+                                                    NULL)  # dep_events_ref
+
+    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
+    c_dpctl.DPCTLEvent_Delete(event_ref)
 
     return result
 
 
-cpdef utils.dpnp_descriptor dpnp_min(utils.dpnp_descriptor input, axis):
-    cdef shape_type_c shape_input = input.shape
+cpdef utils.dpnp_descriptor dpnp_min(utils.dpnp_descriptor x1, axis):
+    cdef shape_type_c x1_shape = x1.shape
     cdef shape_type_c shape_output
 
     if axis is None:
@@ -423,23 +549,23 @@ cpdef utils.dpnp_descriptor dpnp_min(utils.dpnp_descriptor input, axis):
     else:
         if isinstance(axis, int):
             if axis < 0:
-                axis_ = tuple([input.ndim - axis])
+                axis_ = tuple([x1.ndim - axis])
             else:
                 axis_ = tuple([axis])
         else:
             _axis_ = []
             for i in range(len(axis)):
                 if axis[i] < 0:
-                    _axis_.append(input.ndim - axis[i])
+                    _axis_.append(x1.ndim - axis[i])
                 else:
                     _axis_.append(axis[i])
             axis_ = tuple(_axis_)
 
-        for id, shape_axis in enumerate(shape_input):
+        for id, shape_axis in enumerate(x1_shape):
             if id not in axis_:
                 shape_output.push_back(shape_axis)
 
-    return _dpnp_min(input, axis_, shape_output)
+    return _dpnp_min(x1, axis_, shape_output)
 
 
 cpdef utils.dpnp_descriptor dpnp_nanvar(utils.dpnp_descriptor arr, ddof):
@@ -449,22 +575,43 @@ cpdef utils.dpnp_descriptor dpnp_nanvar(utils.dpnp_descriptor arr, ddof):
     res_size = int(arr.size - n)
     cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(arr.dtype)
 
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_NANVAR, param1_type, param1_type)
+    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_NANVAR_EXT, param1_type, param1_type)
+
+    arr_obj = arr.get_array()
 
     # ceate result array with type given by FPTR data
     cdef shape_type_c result_shape = utils._object_to_tuple(res_size)
-    cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, kernel_data.return_type, None)
+    cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape,
+                                                                       kernel_data.return_type,
+                                                                       None,
+                                                                       device=arr_obj.sycl_device,
+                                                                       usm_type=arr_obj.usm_type,
+                                                                       sycl_queue=arr_obj.sycl_queue)
+
+    result_sycl_queue = result.get_array().sycl_queue
+
+    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
+    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
 
     cdef fptr_custom_nanvar_t func = <fptr_custom_nanvar_t > kernel_data.ptr
 
-    func(arr.get_data(), mask_arr.get_data(), result.get_data(), result.size, arr.size)
+    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref,
+                                                    arr.get_data(),
+                                                    mask_arr.get_data(),
+                                                    result.get_data(),
+                                                    result.size,
+                                                    arr.size,
+                                                    NULL)  # dep_events_ref
+
+    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
+    c_dpctl.DPCTLEvent_Delete(event_ref)
 
-    return call_fptr_custom_std_var_1in_1out(DPNP_FN_VAR, result, ddof)
+    return call_fptr_custom_std_var_1in_1out(DPNP_FN_VAR_EXT, result, ddof)
 
 
 cpdef utils.dpnp_descriptor dpnp_std(utils.dpnp_descriptor a, size_t ddof):
-    return call_fptr_custom_std_var_1in_1out(DPNP_FN_STD, a, ddof)
+    return call_fptr_custom_std_var_1in_1out(DPNP_FN_STD_EXT, a, ddof)
 
 
 cpdef utils.dpnp_descriptor dpnp_var(utils.dpnp_descriptor a, size_t ddof):
-    return call_fptr_custom_std_var_1in_1out(DPNP_FN_VAR, a, ddof)
+    return call_fptr_custom_std_var_1in_1out(DPNP_FN_VAR_EXT, a, ddof)
diff --git a/dpnp/dpnp_algo/dpnp_algo_trigonometric.pyx b/dpnp/dpnp_algo/dpnp_algo_trigonometric.pyx
index 6edfb87042fb..bf9c4d5e0ed2 100644
--- a/dpnp/dpnp_algo/dpnp_algo_trigonometric.pyx
+++ b/dpnp/dpnp_algo/dpnp_algo_trigonometric.pyx
@@ -171,7 +171,14 @@ cpdef utils.dpnp_descriptor dpnp_unwrap(utils.dpnp_descriptor array1):
     if array1.dtype == dpnp.float32:
         result_type = dpnp.float32
 
-    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(array1.shape, result_type, None)
+    array1_obj = array1.get_array()
+
+    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(array1.shape,
+                                                                             result_type,
+                                                                             None,
+                                                                             device=array1_obj.sycl_device,
+                                                                             usm_type=array1_obj.usm_type,
+                                                                             sycl_queue=array1_obj.sycl_queue)
 
     for i in range(result.size):
         val, = numpy.unwrap([array1.get_pyobj()[i]])

From 9a93e16a86380230d0f99eded225c07379adfdc8 Mon Sep 17 00:00:00 2001
From: Denis Smirnov <denis.smirnov@intel.com>
Date: Mon, 21 Feb 2022 05:21:06 -0600
Subject: [PATCH 03/18] Add tests for compute follows data for math funcs

---
 .../include/dpnp_gen_1arg_1type_tbl.hpp       |  14 +-
 .../include/dpnp_gen_1arg_2type_tbl.hpp       |  56 ++---
 dpnp/dpnp_algo/dpnp_algo.pyx                  |  14 +-
 dpnp/dpnp_algo/dpnp_algo_mathematical.pyx     |   4 +-
 dpnp/dpnp_iface_arraycreation.py              |   2 +-
 dpnp/dpnp_iface_mathematical.py               | 134 +++++-----
 tests/skipped_tests.tbl                       |  10 +
 tests/skipped_tests_gpu.tbl                   |  10 +
 tests/test_sycl_queue.py                      | 238 +++++++++++++++++-
 9 files changed, 369 insertions(+), 113 deletions(-)

diff --git a/dpnp/backend/include/dpnp_gen_1arg_1type_tbl.hpp b/dpnp/backend/include/dpnp_gen_1arg_1type_tbl.hpp
index dd1ca998d5b9..f5ee23d755f2 100644
--- a/dpnp/backend/include/dpnp_gen_1arg_1type_tbl.hpp
+++ b/dpnp/backend/include/dpnp_gen_1arg_1type_tbl.hpp
@@ -90,20 +90,20 @@
 
 #endif
 
-MACRO_1ARG_1TYPE_OP(dpnp_conjugate_c, std::conj(input_elem), DPNP_QUEUE.submit(kernel_func))
-MACRO_1ARG_1TYPE_OP(dpnp_copy_c, input_elem, DPNP_QUEUE.submit(kernel_func))
+MACRO_1ARG_1TYPE_OP(dpnp_conjugate_c, std::conj(input_elem), q.submit(kernel_func))
+MACRO_1ARG_1TYPE_OP(dpnp_copy_c, input_elem, q.submit(kernel_func))
 MACRO_1ARG_1TYPE_OP(dpnp_erf_c,
                     sycl::erf((double)input_elem),
-                    oneapi::mkl::vm::erf(DPNP_QUEUE, input1_size, input1_data, result)) // no sycl::erf for int and long
-MACRO_1ARG_1TYPE_OP(dpnp_negative_c, -input_elem, DPNP_QUEUE.submit(kernel_func))
+                    oneapi::mkl::vm::erf(q, input1_size, input1_data, result)) // no sycl::erf for int and long
+MACRO_1ARG_1TYPE_OP(dpnp_negative_c, -input_elem, q.submit(kernel_func))
 MACRO_1ARG_1TYPE_OP(dpnp_recip_c,
                     _DataType(1) / input_elem,
-                    DPNP_QUEUE.submit(kernel_func)) // error: no member named 'recip' in namespace 'sycl'
+                    q.submit(kernel_func)) // error: no member named 'recip' in namespace 'sycl'
 MACRO_1ARG_1TYPE_OP(dpnp_sign_c,
                     sycl::sign((double)input_elem),
-                    DPNP_QUEUE.submit(kernel_func)) // no sycl::sign for int and long
+                    q.submit(kernel_func)) // no sycl::sign for int and long
 MACRO_1ARG_1TYPE_OP(dpnp_square_c,
                     input_elem* input_elem,
-                    oneapi::mkl::vm::sqr(DPNP_QUEUE, input1_size, input1_data, result))
+                    oneapi::mkl::vm::sqr(q, input1_size, input1_data, result))
 
 #undef MACRO_1ARG_1TYPE_OP
diff --git a/dpnp/backend/include/dpnp_gen_1arg_2type_tbl.hpp b/dpnp/backend/include/dpnp_gen_1arg_2type_tbl.hpp
index bfd375b807e1..71caeef9111a 100644
--- a/dpnp/backend/include/dpnp_gen_1arg_2type_tbl.hpp
+++ b/dpnp/backend/include/dpnp_gen_1arg_2type_tbl.hpp
@@ -92,81 +92,81 @@
 
 MACRO_1ARG_2TYPES_OP(dpnp_acos_c,
                      sycl::acos(input_elem),
-                     oneapi::mkl::vm::acos(DPNP_QUEUE, input1_size, input1_data, result))
+                     oneapi::mkl::vm::acos(q, input1_size, input1_data, result))
 MACRO_1ARG_2TYPES_OP(dpnp_acosh_c,
                      sycl::acosh(input_elem),
-                     oneapi::mkl::vm::acosh(DPNP_QUEUE, input1_size, input1_data, result))
+                     oneapi::mkl::vm::acosh(q, input1_size, input1_data, result))
 MACRO_1ARG_2TYPES_OP(dpnp_asin_c,
                      sycl::asin(input_elem),
-                     oneapi::mkl::vm::asin(DPNP_QUEUE, input1_size, input1_data, result))
+                     oneapi::mkl::vm::asin(q, input1_size, input1_data, result))
 MACRO_1ARG_2TYPES_OP(dpnp_asinh_c,
                      sycl::asinh(input_elem),
-                     oneapi::mkl::vm::asinh(DPNP_QUEUE, input1_size, input1_data, result))
+                     oneapi::mkl::vm::asinh(q, input1_size, input1_data, result))
 MACRO_1ARG_2TYPES_OP(dpnp_atan_c,
                      sycl::atan(input_elem),
-                     oneapi::mkl::vm::atan(DPNP_QUEUE, input1_size, input1_data, result))
+                     oneapi::mkl::vm::atan(q, input1_size, input1_data, result))
 MACRO_1ARG_2TYPES_OP(dpnp_atanh_c,
                      sycl::atanh(input_elem),
-                     oneapi::mkl::vm::atanh(DPNP_QUEUE, input1_size, input1_data, result))
+                     oneapi::mkl::vm::atanh(q, input1_size, input1_data, result))
 MACRO_1ARG_2TYPES_OP(dpnp_cbrt_c,
                      sycl::cbrt(input_elem),
-                     oneapi::mkl::vm::cbrt(DPNP_QUEUE, input1_size, input1_data, result))
+                     oneapi::mkl::vm::cbrt(q, input1_size, input1_data, result))
 MACRO_1ARG_2TYPES_OP(dpnp_ceil_c,
                      sycl::ceil(input_elem),
-                     oneapi::mkl::vm::ceil(DPNP_QUEUE, input1_size, input1_data, result))
-MACRO_1ARG_2TYPES_OP(dpnp_copyto_c, input_elem, DPNP_QUEUE.submit(kernel_func))
+                     oneapi::mkl::vm::ceil(q, input1_size, input1_data, result))
+MACRO_1ARG_2TYPES_OP(dpnp_copyto_c, input_elem, q.submit(kernel_func))
 MACRO_1ARG_2TYPES_OP(dpnp_cos_c,
                      sycl::cos(input_elem),
-                     oneapi::mkl::vm::cos(DPNP_QUEUE, input1_size, input1_data, result))
+                     oneapi::mkl::vm::cos(q, input1_size, input1_data, result))
 MACRO_1ARG_2TYPES_OP(dpnp_cosh_c,
                      sycl::cosh(input_elem),
-                     oneapi::mkl::vm::cosh(DPNP_QUEUE, input1_size, input1_data, result))
-MACRO_1ARG_2TYPES_OP(dpnp_degrees_c, sycl::degrees(input_elem), DPNP_QUEUE.submit(kernel_func))
+                     oneapi::mkl::vm::cosh(q, input1_size, input1_data, result))
+MACRO_1ARG_2TYPES_OP(dpnp_degrees_c, sycl::degrees(input_elem), q.submit(kernel_func))
 MACRO_1ARG_2TYPES_OP(dpnp_exp2_c,
                      sycl::exp2(input_elem),
-                     oneapi::mkl::vm::exp2(DPNP_QUEUE, input1_size, input1_data, result))
+                     oneapi::mkl::vm::exp2(q, input1_size, input1_data, result))
 MACRO_1ARG_2TYPES_OP(dpnp_exp_c,
                      sycl::exp(input_elem),
-                     oneapi::mkl::vm::exp(DPNP_QUEUE, input1_size, input1_data, result))
+                     oneapi::mkl::vm::exp(q, input1_size, input1_data, result))
 MACRO_1ARG_2TYPES_OP(dpnp_expm1_c,
                      sycl::expm1(input_elem),
-                     oneapi::mkl::vm::expm1(DPNP_QUEUE, input1_size, input1_data, result))
+                     oneapi::mkl::vm::expm1(q, input1_size, input1_data, result))
 MACRO_1ARG_2TYPES_OP(dpnp_fabs_c,
                      sycl::fabs(input_elem),
-                     oneapi::mkl::vm::abs(DPNP_QUEUE, input1_size, input1_data, result))
+                     oneapi::mkl::vm::abs(q, input1_size, input1_data, result))
 MACRO_1ARG_2TYPES_OP(dpnp_floor_c,
                      sycl::floor(input_elem),
-                     oneapi::mkl::vm::floor(DPNP_QUEUE, input1_size, input1_data, result))
+                     oneapi::mkl::vm::floor(q, input1_size, input1_data, result))
 MACRO_1ARG_2TYPES_OP(dpnp_log10_c,
                      sycl::log10(input_elem),
-                     oneapi::mkl::vm::log10(DPNP_QUEUE, input1_size, input1_data, result))
+                     oneapi::mkl::vm::log10(q, input1_size, input1_data, result))
 MACRO_1ARG_2TYPES_OP(dpnp_log1p_c,
                      sycl::log1p(input_elem),
-                     oneapi::mkl::vm::log1p(DPNP_QUEUE, input1_size, input1_data, result))
+                     oneapi::mkl::vm::log1p(q, input1_size, input1_data, result))
 MACRO_1ARG_2TYPES_OP(dpnp_log2_c,
                      sycl::log2(input_elem),
-                     oneapi::mkl::vm::log2(DPNP_QUEUE, input1_size, input1_data, result))
+                     oneapi::mkl::vm::log2(q, input1_size, input1_data, result))
 MACRO_1ARG_2TYPES_OP(dpnp_log_c,
                      sycl::log(input_elem),
-                     oneapi::mkl::vm::ln(DPNP_QUEUE, input1_size, input1_data, result))
-MACRO_1ARG_2TYPES_OP(dpnp_radians_c, sycl::radians(input_elem), DPNP_QUEUE.submit(kernel_func))
+                     oneapi::mkl::vm::ln(q, input1_size, input1_data, result))
+MACRO_1ARG_2TYPES_OP(dpnp_radians_c, sycl::radians(input_elem), q.submit(kernel_func))
 MACRO_1ARG_2TYPES_OP(dpnp_sin_c,
                      sycl::sin(input_elem),
-                     oneapi::mkl::vm::sin(DPNP_QUEUE, input1_size, input1_data, result))
+                     oneapi::mkl::vm::sin(q, input1_size, input1_data, result))
 MACRO_1ARG_2TYPES_OP(dpnp_sinh_c,
                      sycl::sinh(input_elem),
-                     oneapi::mkl::vm::sinh(DPNP_QUEUE, input1_size, input1_data, result))
+                     oneapi::mkl::vm::sinh(q, input1_size, input1_data, result))
 MACRO_1ARG_2TYPES_OP(dpnp_sqrt_c,
                      sycl::sqrt(input_elem),
-                     oneapi::mkl::vm::sqrt(DPNP_QUEUE, input1_size, input1_data, result))
+                     oneapi::mkl::vm::sqrt(q, input1_size, input1_data, result))
 MACRO_1ARG_2TYPES_OP(dpnp_tan_c,
                      sycl::tan(input_elem),
-                     oneapi::mkl::vm::tan(DPNP_QUEUE, input1_size, input1_data, result))
+                     oneapi::mkl::vm::tan(q, input1_size, input1_data, result))
 MACRO_1ARG_2TYPES_OP(dpnp_tanh_c,
                      sycl::tanh(input_elem),
-                     oneapi::mkl::vm::tanh(DPNP_QUEUE, input1_size, input1_data, result))
+                     oneapi::mkl::vm::tanh(q, input1_size, input1_data, result))
 MACRO_1ARG_2TYPES_OP(dpnp_trunc_c,
                      sycl::trunc(input_elem),
-                     oneapi::mkl::vm::trunc(DPNP_QUEUE, input1_size, input1_data, result))
+                     oneapi::mkl::vm::trunc(q, input1_size, input1_data, result))
 
 #undef MACRO_1ARG_2TYPES_OP
diff --git a/dpnp/dpnp_algo/dpnp_algo.pyx b/dpnp/dpnp_algo/dpnp_algo.pyx
index 5f30dbffbd95..18ab2041d00e 100644
--- a/dpnp/dpnp_algo/dpnp_algo.pyx
+++ b/dpnp/dpnp_algo/dpnp_algo.pyx
@@ -379,6 +379,8 @@ cdef utils.dpnp_descriptor call_fptr_1in_1out(DPNPFuncName fptr_name,
 
         result = out
 
+        utils.get_common_usm_allocation(x1, result)  # check USM allocation is common
+
     result_sycl_queue = result.get_array().sycl_queue
 
     cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
@@ -432,6 +434,8 @@ cdef utils.dpnp_descriptor call_fptr_1in_1out_strides(DPNPFuncName fptr_name,
 
         result = out
 
+        utils.get_common_usm_allocation(x1, result)  # check USM allocation is common
+
     result_sycl_queue = result.get_array().sycl_queue
 
     cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
@@ -484,9 +488,10 @@ cdef utils.dpnp_descriptor call_fptr_2in_1out(DPNPFuncName fptr_name,
     cdef shape_type_c result_shape = utils.get_common_shape(x1_shape, x2_shape)
     cdef utils.dpnp_descriptor result
 
+    result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(x1_obj, x2_obj)
+
     if out is None:
         """ Create result array with type given by FPTR data """
-        result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(x1_obj, x2_obj)
         result = utils.create_output_descriptor(result_shape,
                                                 kernel_data.return_type,
                                                 None,
@@ -501,6 +506,8 @@ cdef utils.dpnp_descriptor call_fptr_2in_1out(DPNPFuncName fptr_name,
 
         result = out
 
+        utils.get_common_usm_allocation(x1_obj, result)  # check USM allocation is common
+
     cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
     cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
 
@@ -551,9 +558,10 @@ cdef utils.dpnp_descriptor call_fptr_2in_1out_strides(DPNPFuncName fptr_name,
     cdef shape_type_c result_shape = utils.get_common_shape(x1_shape, x2_shape)
     cdef utils.dpnp_descriptor result
 
+    result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(x1_obj, x2_obj)
+
     if out is None:
         """ Create result array with type given by FPTR data """
-        result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(x1_obj, x2_obj)
         result = utils.create_output_descriptor(result_shape,
                                                 kernel_data.return_type,
                                                 None,
@@ -568,6 +576,8 @@ cdef utils.dpnp_descriptor call_fptr_2in_1out_strides(DPNPFuncName fptr_name,
 
         result = out
 
+        utils.get_common_usm_allocation(x1_obj, result)  # check USM allocation is common
+
     cdef shape_type_c result_strides = utils.strides_to_vector(result.strides, result_shape)
 
     result_obj = result.get_array()
diff --git a/dpnp/dpnp_algo/dpnp_algo_mathematical.pyx b/dpnp/dpnp_algo/dpnp_algo_mathematical.pyx
index 142163054bcd..5d937a7008bb 100644
--- a/dpnp/dpnp_algo/dpnp_algo_mathematical.pyx
+++ b/dpnp/dpnp_algo/dpnp_algo_mathematical.pyx
@@ -451,7 +451,7 @@ cpdef utils.dpnp_descriptor dpnp_nancumprod(utils.dpnp_descriptor x1):
         if dpnp.isnan(cur_x1_flatiter[i]):
             cur_x1_flatiter[i] = 1
 
-    x1_desc = dpnp.get_dpnp_descriptor(cur_x1)
+    x1_desc = dpnp.get_dpnp_descriptor(cur_x1, copy_when_nondefault_queue=False)
     return dpnp_cumprod(x1_desc)
 
 
@@ -464,7 +464,7 @@ cpdef utils.dpnp_descriptor dpnp_nancumsum(utils.dpnp_descriptor x1):
         if dpnp.isnan(cur_x1_flatiter[i]):
             cur_x1_flatiter[i] = 0
 
-    x1_desc = dpnp.get_dpnp_descriptor(cur_x1)
+    x1_desc = dpnp.get_dpnp_descriptor(cur_x1, copy_when_nondefault_queue=False)
     return dpnp_cumsum(x1_desc)
 
 
diff --git a/dpnp/dpnp_iface_arraycreation.py b/dpnp/dpnp_iface_arraycreation.py
index 65891b1c1ec2..cf0d5087d702 100644
--- a/dpnp/dpnp_iface_arraycreation.py
+++ b/dpnp/dpnp_iface_arraycreation.py
@@ -390,7 +390,7 @@ def copy(x1, order='K', subok=False):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False)
     if x1_desc:
         if order != 'K':
             pass
diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py
index 5a9ad98d3167..d4515b43cd3a 100644
--- a/dpnp/dpnp_iface_mathematical.py
+++ b/dpnp/dpnp_iface_mathematical.py
@@ -142,7 +142,7 @@ def absolute(x1, **kwargs):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
     if x1_desc and not kwargs:
         if not x1_desc.ndim:
             pass
@@ -181,8 +181,8 @@ def add(x1, x2, dtype=None, out=None, where=True, **kwargs):
 
     x1_is_scalar = dpnp.isscalar(x1)
     x2_is_scalar = dpnp.isscalar(x2)
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False)
-    x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False)
+    x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False)
 
     if x1_desc and x2_desc and not kwargs:
         if not x1_desc and not x1_is_scalar:
@@ -202,7 +202,7 @@ def add(x1, x2, dtype=None, out=None, where=True, **kwargs):
         elif not where:
             pass
         else:
-            out_desc = dpnp.get_dpnp_descriptor(out) if out is not None else None
+            out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) if out is not None else None
             return dpnp_add(x1_desc, x2_desc, dtype, out_desc, where).get_pyobj()
 
     return call_origin(numpy.add, x1, x2, dtype=dtype, out=out, where=where, **kwargs)
@@ -237,7 +237,7 @@ def around(x1, decimals=0, out=None):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
     if x1_desc:
         if out is not None:
             pass
@@ -277,9 +277,9 @@ def ceil(x1, out=None, **kwargs):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False)
     if x1_desc and not kwargs:
-        out_desc = dpnp.get_dpnp_descriptor(out) if out is not None else None
+        out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) if out is not None else None
         return dpnp_ceil(x1_desc, out_desc).get_pyobj()
 
     return call_origin(numpy.ceil, x1, out=out, **kwargs)
@@ -307,7 +307,7 @@ def conjugate(x1, **kwargs):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False)
     if x1_desc and not kwargs:
         return dpnp_conjugate(x1_desc).get_pyobj()
 
@@ -359,8 +359,8 @@ def copysign(x1, x2, dtype=None, out=None, where=True, **kwargs):
 
     x1_is_scalar = dpnp.isscalar(x1)
     x2_is_scalar = dpnp.isscalar(x2)
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False)
-    x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False)
+    x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False)
 
     if x1_desc and x2_desc and not kwargs:
         if not x1_desc and not x1_is_scalar:
@@ -411,8 +411,8 @@ def cross(x1, x2, axisa=-1, axisb=-1, axisc=-1, axis=None):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1)
-    x2_desc = dpnp.get_dpnp_descriptor(x2)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
+    x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_nondefault_queue=False)
 
     if x1_desc and x2_desc:
         if x1_desc.size != 3 or x2_desc.size != 3:
@@ -460,7 +460,7 @@ def cumprod(x1, **kwargs):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
     if x1_desc and not kwargs:
         return dpnp_cumprod(x1_desc).get_pyobj()
 
@@ -494,7 +494,7 @@ def cumsum(x1, **kwargs):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
     if x1_desc and not kwargs:
         return dpnp_cumsum(x1_desc).get_pyobj()
 
@@ -514,7 +514,7 @@ def diff(x1, n=1, axis=-1, prepend=numpy._NoValue, append=numpy._NoValue):
     Otherwise the function will be executed sequentially on CPU.
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
     if x1_desc:
         if not isinstance(n, int):
             pass
@@ -524,9 +524,9 @@ def diff(x1, n=1, axis=-1, prepend=numpy._NoValue, append=numpy._NoValue):
             pass
         elif axis != -1:
             pass
-        elif prepend is not None:
+        elif prepend is not numpy._NoValue:
             pass
-        elif append is not None:
+        elif append is not numpy._NoValue:
             pass
         else:
             return dpnp_diff(x1_desc, n).get_pyobj()
@@ -559,8 +559,8 @@ def divide(x1, x2, dtype=None, out=None, where=True, **kwargs):
 
     x1_is_scalar = dpnp.isscalar(x1)
     x2_is_scalar = dpnp.isscalar(x2)
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False)
-    x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False)
+    x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False)
 
     if x1_desc and x2_desc and not kwargs:
         if not x1_desc and not x1_is_scalar:
@@ -614,7 +614,7 @@ def ediff1d(x1, to_end=None, to_begin=None):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
     if x1_desc:
         if to_begin is not None:
             pass
@@ -650,7 +650,7 @@ def fabs(x1, **kwargs):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False)
     if x1_desc:
         return dpnp_fabs(x1_desc).get_pyobj()
 
@@ -690,9 +690,9 @@ def floor(x1, out=None, **kwargs):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False)
     if x1_desc and not kwargs:
-        out_desc = dpnp.get_dpnp_descriptor(out) if out is not None else None
+        out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) if out is not None else None
         return dpnp_floor(x1_desc, out_desc).get_pyobj()
 
     return call_origin(numpy.floor, x1, out=out, **kwargs)
@@ -730,8 +730,8 @@ def floor_divide(x1, x2, dtype=None, out=None, where=True, **kwargs):
 
     x1_is_scalar = dpnp.isscalar(x1)
     x2_is_scalar = dpnp.isscalar(x2)
-    x1_desc = dpnp.get_dpnp_descriptor(x1)
-    x2_desc = dpnp.get_dpnp_descriptor(x2)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
+    x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_nondefault_queue=False)
 
     if x1_desc and x2_desc and not kwargs:
         if not x1_desc and not x1_is_scalar:
@@ -747,6 +747,7 @@ def floor_divide(x1, x2, dtype=None, out=None, where=True, **kwargs):
         elif x2_is_scalar and not x2_desc:
             pass
         elif x1_desc and x2_desc and x1_desc.size != x2_desc.size:
+            # TODO: enable broadcasting
             pass
         elif x1_desc and x2_desc and x1_desc.shape != x2_desc.shape:
             pass
@@ -759,7 +760,7 @@ def floor_divide(x1, x2, dtype=None, out=None, where=True, **kwargs):
         elif x1_is_scalar and x2_desc.ndim > 1:
             pass
         else:
-            out_desc = dpnp.get_dpnp_descriptor(out) if out is not None else None
+            out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) if out is not None else None
             return dpnp_floor_divide(x1_desc, x2_desc, dtype, out_desc, where).get_pyobj()
 
     return call_origin(numpy.floor_divide, x1, x2, out=out, where=where, dtype=dtype, **kwargs)
@@ -839,8 +840,8 @@ def fmod(x1, x2, dtype=None, out=None, where=True, **kwargs):
 
     x1_is_scalar = dpnp.isscalar(x1)
     x2_is_scalar = dpnp.isscalar(x2)
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False)
-    x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False)
+    x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False)
 
     if x1_desc and x2_desc and not kwargs:
         if not x1_desc and not x1_is_scalar:
@@ -860,7 +861,7 @@ def fmod(x1, x2, dtype=None, out=None, where=True, **kwargs):
         elif not where:
             pass
         else:
-            out_desc = dpnp.get_dpnp_descriptor(out) if out is not None else None
+            out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) if out is not None else None
             return dpnp_fmod(x1_desc, x2_desc, dtype, out_desc, where).get_pyobj()
 
     return call_origin(numpy.fmod, x1, x2, dtype=dtype, out=out, where=where, **kwargs)
@@ -893,7 +894,7 @@ def gradient(x1, *varargs, **kwargs):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
     if x1_desc and not kwargs:
         if len(varargs) > 1:
             pass
@@ -939,8 +940,8 @@ def maximum(x1, x2, dtype=None, out=None, where=True, **kwargs):
 
     x1_is_scalar = dpnp.isscalar(x1)
     x2_is_scalar = dpnp.isscalar(x2)
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False)
-    x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False)
+    x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False)
 
     if x1_desc and x2_desc and not kwargs:
         if not x1_desc and not x1_is_scalar:
@@ -996,8 +997,8 @@ def minimum(x1, x2, dtype=None, out=None, where=True, **kwargs):
 
     x1_is_scalar = dpnp.isscalar(x1)
     x2_is_scalar = dpnp.isscalar(x2)
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False)
-    x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False)
+    x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False)
 
     if x1_desc and x2_desc and not kwargs:
         if not x1_desc and not x1_is_scalar:
@@ -1067,7 +1068,7 @@ def modf(x1, **kwargs):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
     if x1_desc and not kwargs:
         return dpnp_modf(x1_desc)
 
@@ -1100,8 +1101,8 @@ def multiply(x1, x2, dtype=None, out=None, where=True, **kwargs):
 
     x1_is_scalar = dpnp.isscalar(x1)
     x2_is_scalar = dpnp.isscalar(x2)
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False)
-    x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False)
+    x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False)
 
     if x1_desc and x2_desc and not kwargs:
         if not x2_desc and not x2_is_scalar:
@@ -1154,7 +1155,7 @@ def nancumprod(x1, **kwargs):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
     if x1_desc and not kwargs:
         return dpnp_nancumprod(x1_desc).get_pyobj()
 
@@ -1190,7 +1191,7 @@ def nancumsum(x1, **kwargs):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
     if x1_desc and not kwargs:
         return dpnp_nancumsum(x1_desc).get_pyobj()
 
@@ -1220,7 +1221,7 @@ def nanprod(x1, **kwargs):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
     if x1_desc and not kwargs:
         return dpnp_nanprod(x1_desc).get_pyobj()
 
@@ -1250,7 +1251,7 @@ def nansum(x1, **kwargs):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
     if x1_desc and not kwargs:
         result_obj = dpnp_nansum(x1_desc).get_pyobj()
         result = dpnp.convert_single_elem_array_to_scalar(result_obj)
@@ -1283,7 +1284,7 @@ def negative(x1, **kwargs):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False)
     if x1_desc and not kwargs:
         return dpnp_negative(x1_desc).get_pyobj()
 
@@ -1324,8 +1325,8 @@ def power(x1, x2, dtype=None, out=None, where=True, **kwargs):
 
     x1_is_scalar = dpnp.isscalar(x1)
     x2_is_scalar = dpnp.isscalar(x2)
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False)
-    x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False)
+    x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False)
 
     if x1_desc and x2_desc and not kwargs:
         if not x1_desc and not x1_is_scalar:
@@ -1343,7 +1344,7 @@ def power(x1, x2, dtype=None, out=None, where=True, **kwargs):
         elif not where:
             pass
         else:
-            out_desc = dpnp.get_dpnp_descriptor(out) if out is not None else None
+            out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) if out is not None else None
             return dpnp_power(x1_desc, x2_desc, dtype, out_desc, where).get_pyobj()
 
     return call_origin(numpy.power, x1, x2, dtype=dtype, out=out, where=where, **kwargs)
@@ -1370,12 +1371,12 @@ def prod(x1, axis=None, dtype=None, out=None, keepdims=False, initial=None, wher
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
     if x1_desc:
         if where is not True:
             pass
         else:
-            out_desc = dpnp.get_dpnp_descriptor(out) if out is not None else None
+            out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) if out is not None else None
             result_obj = dpnp_prod(x1_desc, axis, dtype, out_desc, keepdims, initial, where).get_pyobj()
             result = dpnp.convert_single_elem_array_to_scalar(result_obj, keepdims)
 
@@ -1416,8 +1417,8 @@ def remainder(x1, x2, out=None, where=True, dtype=None, **kwargs):
 
     x1_is_scalar = dpnp.isscalar(x1)
     x2_is_scalar = dpnp.isscalar(x2)
-    x1_desc = dpnp.get_dpnp_descriptor(x1)
-    x2_desc = dpnp.get_dpnp_descriptor(x2)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
+    x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_nondefault_queue=False)
 
     if x1_desc and x2_desc and not kwargs:
         if not x1_desc and not x1_is_scalar:
@@ -1433,6 +1434,7 @@ def remainder(x1, x2, out=None, where=True, dtype=None, **kwargs):
         elif x2_is_scalar and not x2_desc:
             pass
         elif x1_desc and x2_desc and x1_desc.size != x2_desc.size:
+            # TODO: enable broadcasting
             pass
         elif x1_desc and x2_desc and x1_desc.shape != x2_desc.shape:
             pass
@@ -1445,7 +1447,7 @@ def remainder(x1, x2, out=None, where=True, dtype=None, **kwargs):
         elif x1_is_scalar and x2_desc.ndim > 1:
             pass
         else:
-            out_desc = dpnp.get_dpnp_descriptor(out) if out is not None else None
+            out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) if out is not None else None
             return dpnp_remainder(x1_desc, x2_desc, dtype, out_desc, where).get_pyobj()
 
     return call_origin(numpy.remainder, x1, x2, out=out, where=where, dtype=dtype, **kwargs)
@@ -1488,7 +1490,7 @@ def sign(x1, **kwargs):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False)
     if x1_desc and not kwargs:
         return dpnp_sign(x1_desc).get_pyobj()
 
@@ -1520,8 +1522,8 @@ def subtract(x1, x2, dtype=None, out=None, where=True, **kwargs):
 
     x1_is_scalar = dpnp.isscalar(x1)
     x2_is_scalar = dpnp.isscalar(x2)
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False)
-    x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False)
+    x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False)
 
     if x1_desc and x2_desc and not kwargs:
         if not x1_desc and not x1_is_scalar:
@@ -1545,7 +1547,7 @@ def subtract(x1, x2, dtype=None, out=None, where=True, **kwargs):
         elif not where:
             pass
         else:
-            out_desc = dpnp.get_dpnp_descriptor(out) if out is not None else None
+            out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) if out is not None else None
             return dpnp_subtract(x1_desc, x2_desc, dtype=dtype, out=out_desc, where=where).get_pyobj()
 
     return call_origin(numpy.subtract, x1, x2, dtype=dtype, out=out, where=where, **kwargs)
@@ -1572,12 +1574,12 @@ def sum(x1, axis=None, dtype=None, out=None, keepdims=False, initial=None, where
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
     if x1_desc:
         if where is not True:
             pass
         else:
-            out_desc = dpnp.get_dpnp_descriptor(out) if out is not None else None
+            out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) if out is not None else None
             result_obj = dpnp_sum(x1_desc, axis, dtype, out_desc, keepdims, initial, where).get_pyobj()
             result = dpnp.convert_single_elem_array_to_scalar(result_obj, keepdims)
 
@@ -1613,13 +1615,23 @@ def trapz(y1, x1=None, dx=1.0, axis=-1):
 
     """
 
-    y_desc = dpnp.get_dpnp_descriptor(y1)
+    y_desc = dpnp.get_dpnp_descriptor(y1, copy_when_nondefault_queue=False)
     if y_desc:
         if y_desc.ndim > 1:
             pass
         else:
-            x_obj = dpnp.empty(y_desc.shape, dtype=y_desc.dtype) if x1 is None else x1
-            x_desc = dpnp.get_dpnp_descriptor(x_obj)
+            y_obj = y_desc.get_array()
+            if x1 is None:
+                x_obj = dpnp.empty(y_desc.shape,
+                                   dtype=y_desc.dtype,
+                                   device=y_obj.sycl_device,
+                                   usm_type=y_obj.usm_type,
+                                   sycl_queue=y_obj.sycl_queue)
+            else:
+                x_obj = x1
+
+            x_desc = dpnp.get_dpnp_descriptor(x_obj, copy_when_nondefault_queue=False)
+            # TODO: change to "not x_desc"
             if x_desc:
                 pass
             elif y_desc.size != x_desc.size:
@@ -1680,9 +1692,9 @@ def trunc(x1, out=None, **kwargs):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False)
     if x1_desc and not kwargs:
-        out_desc = dpnp.get_dpnp_descriptor(out) if out is not None else None
+        out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) if out is not None else None
         return dpnp_trunc(x1_desc, out_desc).get_pyobj()
 
     return call_origin(numpy.trunc, x1, out=out, **kwargs)
diff --git a/tests/skipped_tests.tbl b/tests/skipped_tests.tbl
index f1f078896dd2..5692e09e014b 100644
--- a/tests/skipped_tests.tbl
+++ b/tests/skipped_tests.tbl
@@ -1,3 +1,13 @@
+tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-conjugate-data2]
+tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-trapz-data19]
+tests/test_sycl_queue.py::test_1in_1out[opencl:cpu:0-conjugate-data2]
+tests/test_sycl_queue.py::test_1in_1out[opencl:cpu:0-trapz-data19]
+tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-conjugate-data2]
+tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-floor_divide-data12-data22]
+tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-remainder-data15-data25]
+tests/test_sycl_queue.py::test_broadcasting[opencl:cpu:0-floor_divide-data12-data22]
+tests/test_sycl_queue.py::test_broadcasting[opencl:cpu:0-remainder-data15-data25]
+
 tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_15_{axes=(), norm=None, s=None, shape=(2, 3, 4)}::test_fft2
 tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_15_{axes=(), norm=None, s=None, shape=(2, 3, 4)}::test_ifft2
 tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_16_{axes=(0, 1, 2), norm='ortho', s=(2, 3), shape=(2, 3, 4)}::test_fft2
diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl
index 29fd403f750b..833fc84c6b25 100644
--- a/tests/skipped_tests_gpu.tbl
+++ b/tests/skipped_tests_gpu.tbl
@@ -1,3 +1,13 @@
+tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-conjugate-data2]
+tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-trapz-data19]
+tests/test_sycl_queue.py::test_1in_1out[opencl:cpu:0-conjugate-data2]
+tests/test_sycl_queue.py::test_1in_1out[opencl:cpu:0-trapz-data19]
+tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-conjugate-data2]
+tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-floor_divide-data12-data22]
+tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-remainder-data15-data25]
+tests/test_sycl_queue.py::test_broadcasting[opencl:cpu:0-floor_divide-data12-data22]
+tests/test_sycl_queue.py::test_broadcasting[opencl:cpu:0-remainder-data15-data25]
+
 tests/test_indexing.py::test_nonzero[[[1, 0], [1, 0]]]
 tests/test_indexing.py::test_nonzero[[[1, 2], [3, 4]]]
 tests/test_indexing.py::test_nonzero[[[0, 1, 2], [3, 0, 5], [6, 7, 0]]]
diff --git a/tests/test_sycl_queue.py b/tests/test_sycl_queue.py
index 66fbfb786f89..00844f048db9 100644
--- a/tests/test_sycl_queue.py
+++ b/tests/test_sycl_queue.py
@@ -34,20 +34,127 @@ def assert_sycl_queue_equal(result, expected):
     assert exec_queue is not None
 
 
+@pytest.mark.parametrize(
+    "func,data",
+    [
+        pytest.param("abs",
+                     [-1.2, 1.2]),
+        pytest.param("ceil",
+                     [-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]),
+        pytest.param("conjugate",
+                     [[1.+1.j, 0.], [0., 1.+1.j]]),
+        pytest.param("copy",
+                     [1., 2., 3.]),
+        pytest.param("cumprod",
+                     [[1., 2., 3.], [4., 5., 6.]]),
+        pytest.param("cumsum",
+                     [[1., 2., 3.], [4., 5., 6.]]),
+        pytest.param("diff",
+                     [1., 2., 4., 7., 0.]),
+        pytest.param("ediff1d",
+                     [1., 2., 4., 7., 0.]),
+        pytest.param("fabs",
+                     [-1.2, 1.2]),
+        pytest.param("floor",
+                     [-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]),
+        pytest.param("gradient",
+                     [1., 2., 4., 7., 11., 16.]),
+        pytest.param("nancumprod",
+                     [1., dpnp.nan]),
+        pytest.param("nancumsum",
+                     [1., dpnp.nan]),
+        pytest.param("nanprod",
+                     [1., dpnp.nan]),
+        pytest.param("nansum",
+                     [1., dpnp.nan]),
+        pytest.param("negative",
+                     [1., -1.]),
+        pytest.param("prod",
+                     [1., 2.]),
+        pytest.param("sign",
+                     [-5., 4.5]),
+        pytest.param("sum",
+                     [1., 2.]),
+        pytest.param("trapz",
+                     [[0., 1., 2.], [3., 4., 5.]]),
+        pytest.param("trunc",
+                     [-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]),
+    ],
+)
 @pytest.mark.parametrize("device",
                          valid_devices,
                          ids=[device.filter_string for device in valid_devices])
-def test_matmul(device):
-    data1 = [[1., 1., 1.], [1., 1., 1.]]
-    data2 = [[1., 1.], [1., 1.], [1., 1.]]
+def test_1in_1out(func, data, device):
+    x_orig = numpy.array(data)
+    expected = getattr(numpy, func)(x_orig)
 
+    x = dpnp.array(data, device=device)
+    result = getattr(dpnp, func)(x)
+
+    numpy.testing.assert_array_equal(result, expected)
+
+    expected_queue = x.get_array().sycl_queue
+    result_queue = result.get_array().sycl_queue
+
+    assert_sycl_queue_equal(result_queue, expected_queue)
+    assert result_queue.sycl_device == expected_queue.sycl_device
+
+
+@pytest.mark.parametrize(
+    "func,data1,data2",
+    [
+        pytest.param("add",
+                     [0., 1., 2., 3., 4., 5., 6., 7., 8.],
+                     [0., 1., 2., 0., 1., 2., 0., 1., 2.]),
+        pytest.param("copysign",
+                     [0., 1., 2.],
+                     [-1., 0., 1.]),
+        pytest.param("cross",
+                     [1., 2., 3.],
+                     [4., 5., 6.]),
+        pytest.param("divide",
+                     [0., 1., 2., 3., 4.],
+                     [4., 4., 4., 4., 4.]),
+        pytest.param("floor_divide",
+                     [1., 2., 3., 4.],
+                     [2.5, 2.5, 2.5, 2.5]),
+        pytest.param("fmod",
+                     [-3., -2., -1., 1., 2., 3.],
+                     [2., 2., 2., 2., 2., 2.]),
+        pytest.param("maximum",
+                     [2., 3., 4.],
+                     [1., 5., 2.]),
+        pytest.param("minimum",
+                     [2., 3., 4.],
+                     [1., 5., 2.]),
+        pytest.param("multiply",
+                     [0., 1., 2., 3., 4., 5., 6., 7., 8.],
+                     [0., 1., 2., 0., 1., 2., 0., 1., 2.]),
+        pytest.param("power",
+                     [0., 1., 2., 3., 4., 5.],
+                     [1., 2., 3., 3., 2., 1.]),
+        pytest.param("remainder",
+                     [0., 1., 2., 3., 4., 5., 6.],
+                     [5., 5., 5., 5., 5., 5., 5.]),
+        pytest.param("subtract",
+                     [0., 1., 2., 3., 4., 5., 6., 7., 8.],
+                     [0., 1., 2., 0., 1., 2., 0., 1., 2.]),
+        pytest.param("matmul",
+                     [[1., 0.], [0., 1.]],
+                     [[4., 1.], [1., 2.]]),
+    ],
+)
+@pytest.mark.parametrize("device",
+                         valid_devices,
+                         ids=[device.filter_string for device in valid_devices])
+def test_2in_1out(func, data1, data2, device):
     x1_orig = numpy.array(data1)
     x2_orig = numpy.array(data2)
-    expected = numpy.matmul(x1_orig, x2_orig)
+    expected = getattr(numpy, func)(x1_orig, x2_orig)
 
     x1 = dpnp.array(data1, device=device)
     x2 = dpnp.array(data2, device=device)
-    result = dpnp.matmul(x1, x2)
+    result = getattr(dpnp, func)(x1, x2)
 
     numpy.testing.assert_array_equal(result, expected)
 
@@ -58,16 +165,36 @@ def test_matmul(device):
     assert result_queue.sycl_device == expected_queue.sycl_device
 
 
-
-@pytest.mark.parametrize("func",
-                         [])
+@pytest.mark.parametrize(
+    "func,data1,data2",
+    [
+        pytest.param("add",
+                     [[0., 1., 2.], [3., 4., 5.], [6., 7., 8.]],
+                     [0., 1., 2.]),
+        pytest.param("divide",
+                     [0., 1., 2., 3., 4.],
+                     [4.]),
+        pytest.param("floor_divide",
+                     [1., 2., 3., 4.],
+                     [2.5]),
+        pytest.param("fmod",
+                     [-3., -2., -1., 1., 2., 3.],
+                     [2.]),
+        pytest.param("multiply",
+                     [[0., 1., 2.], [3., 4., 5.], [6., 7., 8.]],
+                     [0., 1., 2.]),
+        pytest.param("remainder",
+                     [0., 1., 2., 3., 4., 5., 6.],
+                     [5.]),
+        pytest.param("subtract",
+                     [[0., 1., 2.], [3., 4., 5.], [6., 7., 8.]],
+                     [0., 1., 2.]),
+    ],
+)
 @pytest.mark.parametrize("device",
                          valid_devices,
                          ids=[device.filter_string for device in valid_devices])
-def test_2in_1out(func, device):
-    data1 = [1., 1., 1., 1., 1.]
-    data2 = [1., 2., 3., 4., 5.]
-
+def test_broadcasting(func, data1, data2, device):
     x1_orig = numpy.array(data1)
     x2_orig = numpy.array(data2)
     expected = getattr(numpy, func)(x1_orig, x2_orig)
@@ -83,3 +210,90 @@ def test_2in_1out(func, device):
 
     assert_sycl_queue_equal(result_queue, expected_queue)
     assert result_queue.sycl_device == expected_queue.sycl_device
+
+
+@pytest.mark.parametrize(
+    "func,data1,data2",
+    [
+        pytest.param("add",
+                     [0., 1., 2., 3., 4., 5., 6., 7., 8.],
+                     [0., 1., 2., 0., 1., 2., 0., 1., 2.]),
+        pytest.param("copysign",
+                     [0., 1., 2.],
+                     [-1., 0., 1.]),
+        pytest.param("divide",
+                     [0., 1., 2., 3., 4.],
+                     [4., 4., 4., 4., 4.]),
+        pytest.param("floor_divide",
+                     [1., 2., 3., 4.],
+                     [2.5, 2.5, 2.5, 2.5]),
+        pytest.param("fmod",
+                     [-3., -2., -1., 1., 2., 3.],
+                     [2., 2., 2., 2., 2., 2.]),
+        pytest.param("maximum",
+                     [2., 3., 4.],
+                     [1., 5., 2.]),
+        pytest.param("minimum",
+                     [2., 3., 4.],
+                     [1., 5., 2.]),
+        pytest.param("multiply",
+                     [0., 1., 2., 3., 4., 5., 6., 7., 8.],
+                     [0., 1., 2., 0., 1., 2., 0., 1., 2.]),
+        pytest.param("power",
+                     [0., 1., 2., 3., 4., 5.],
+                     [1., 2., 3., 3., 2., 1.]),
+        pytest.param("remainder",
+                     [0., 1., 2., 3., 4., 5., 6.],
+                     [5., 5., 5., 5., 5., 5., 5.]),
+        pytest.param("subtract",
+                     [0., 1., 2., 3., 4., 5., 6., 7., 8.],
+                     [0., 1., 2., 0., 1., 2., 0., 1., 2.]),
+    ],
+)
+@pytest.mark.parametrize("device",
+                         valid_devices,
+                         ids=[device.filter_string for device in valid_devices])
+def test_out(func, data1, data2, device):
+    x1_orig = numpy.array(data1)
+    x2_orig = numpy.array(data2)
+    expected = numpy.empty(x1_orig.size)
+    numpy.add(x1_orig, x2_orig, out=expected)
+
+    x1 = dpnp.array(data1, device=device)
+    x2 = dpnp.array(data2, device=device)
+    result = dpnp.empty(x1.size, device=device)
+    dpnp.add(x1, x2, out=result)
+
+    numpy.testing.assert_array_equal(result, expected)
+
+    expected_queue = x1.get_array().sycl_queue
+    result_queue = result.get_array().sycl_queue
+
+    assert_sycl_queue_equal(result_queue, expected_queue)
+    assert result_queue.sycl_device == expected_queue.sycl_device
+
+
+@pytest.mark.parametrize("device",
+                         valid_devices,
+                         ids=[device.filter_string for device in valid_devices])
+def test_modf(device):
+    data = [0, 3.5]
+
+    x_orig = numpy.array(data)
+    expected1, expected2 = numpy.modf(x_orig)
+
+    x = dpnp.array(data, device=device)
+    result1, result2 = dpnp.modf(x)
+
+    numpy.testing.assert_array_equal(result1, expected1)
+    numpy.testing.assert_array_equal(result2, expected2)
+
+    expected_queue = x.get_array().sycl_queue
+    result1_queue = result1.get_array().sycl_queue
+    result2_queue = result2.get_array().sycl_queue
+
+    assert_sycl_queue_equal(result1_queue, expected_queue)
+    assert_sycl_queue_equal(result2_queue, expected_queue)
+
+    assert result1_queue.sycl_device == expected_queue.sycl_device
+    assert result2_queue.sycl_device == expected_queue.sycl_device

From 9f7d19958d19e8228115659e94384c6c77d81431 Mon Sep 17 00:00:00 2001
From: Denis Smirnov <denis.smirnov@intel.com>
Date: Tue, 22 Feb 2022 11:56:05 -0600
Subject: [PATCH 04/18] Add copy_when_nondefault_queue=False to call of
 get_dpnp_descriptor

---
 dpnp/dpnp_algo/dpnp_algo_manipulation.pyx |  3 +-
 dpnp/dpnp_algo/dpnp_algo_statistics.pyx   |  3 +-
 dpnp/dpnp_iface_arraycreation.py          | 20 +++---
 dpnp/dpnp_iface_bitwise.py                | 12 ++--
 dpnp/dpnp_iface_counting.py               |  2 +-
 dpnp/dpnp_iface_indexing.py               | 44 ++++++-------
 dpnp/dpnp_iface_linearalgebra.py          | 22 +++----
 dpnp/dpnp_iface_logic.py                  |  8 +--
 dpnp/dpnp_iface_manipulation.py           | 28 ++++----
 dpnp/dpnp_iface_searching.py              |  4 +-
 dpnp/dpnp_iface_sorting.py                | 10 +--
 dpnp/dpnp_iface_statistics.py             | 24 +++----
 dpnp/dpnp_iface_trigonometric.py          | 78 +++++++++++------------
 13 files changed, 130 insertions(+), 128 deletions(-)

diff --git a/dpnp/dpnp_algo/dpnp_algo_manipulation.pyx b/dpnp/dpnp_algo/dpnp_algo_manipulation.pyx
index b2b0c4f9068e..0a51b44e7e52 100644
--- a/dpnp/dpnp_algo/dpnp_algo_manipulation.pyx
+++ b/dpnp/dpnp_algo/dpnp_algo_manipulation.pyx
@@ -227,7 +227,8 @@ cpdef utils.dpnp_descriptor dpnp_reshape(utils.dpnp_descriptor array1, newshape,
                                                order=order,
                                                device=array1_obj.sycl_device,
                                                usm_type=array1_obj.usm_type,
-                                               sycl_queue=array1_obj.sycl_queue))
+                                               sycl_queue=array1_obj.sycl_queue),
+                                    copy_when_nondefault_queue=False)
 
 
 cpdef utils.dpnp_descriptor dpnp_transpose(utils.dpnp_descriptor array1, axes=None):
diff --git a/dpnp/dpnp_algo/dpnp_algo_statistics.pyx b/dpnp/dpnp_algo/dpnp_algo_statistics.pyx
index 872209df371f..920068c7d631 100644
--- a/dpnp/dpnp_algo/dpnp_algo_statistics.pyx
+++ b/dpnp/dpnp_algo/dpnp_algo_statistics.pyx
@@ -570,7 +570,8 @@ cpdef utils.dpnp_descriptor dpnp_min(utils.dpnp_descriptor x1, axis):
 
 cpdef utils.dpnp_descriptor dpnp_nanvar(utils.dpnp_descriptor arr, ddof):
     # dpnp_isnan does not support USM array as input in comparison to dpnp.isnan
-    cdef utils.dpnp_descriptor mask_arr = dpnp.get_dpnp_descriptor(dpnp.isnan(arr.get_pyobj()))
+    cdef utils.dpnp_descriptor mask_arr = dpnp.get_dpnp_descriptor(dpnp.isnan(arr.get_pyobj()),
+                                                                   copy_when_nondefault_queue=False)
     n = dpnp.count_nonzero(mask_arr.get_pyobj())
     res_size = int(arr.size - n)
     cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(arr.dtype)
diff --git a/dpnp/dpnp_iface_arraycreation.py b/dpnp/dpnp_iface_arraycreation.py
index cf0d5087d702..44c7b88c3c78 100644
--- a/dpnp/dpnp_iface_arraycreation.py
+++ b/dpnp/dpnp_iface_arraycreation.py
@@ -431,7 +431,7 @@ def diag(x1, k=0):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
     if x1_desc:
         if not isinstance(k, int):
             pass
@@ -465,10 +465,10 @@ def diagflat(x1, k=0):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
     if x1_desc:
         input_ravel = dpnp.ravel(x1)
-        input_ravel_desc = dpnp.get_dpnp_descriptor(input_ravel)
+        input_ravel_desc = dpnp.get_dpnp_descriptor(input_ravel, copy_when_nondefault_queue=False)
 
         return dpnp_diag(input_ravel_desc, k).get_pyobj()
 
@@ -1138,7 +1138,7 @@ def ones_like(x1, dtype=None, order='C', subok=False, shape=None):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
     if x1_desc:
         if order not in ('C', 'c', None):
             pass
@@ -1164,7 +1164,7 @@ def ptp(arr, axis=None, out=None, keepdims=numpy._NoValue):
     Input array is supported as :obj:`dpnp.ndarray`.
     Parameters ``out`` and ``keepdims`` are supported only with default values.
     """
-    arr_desc = dpnp.get_dpnp_descriptor(arr)
+    arr_desc = dpnp.get_dpnp_descriptor(arr, copy_when_nondefault_queue=False)
     if not arr_desc:
         pass
     elif axis is not None and not isinstance(axis, int):
@@ -1194,7 +1194,7 @@ def trace(x1, offset=0, axis1=0, axis2=1, dtype=None, out=None):
        Parameters ``axis1``, ``axis2``, ``out`` and ``dtype`` are supported only with default values.
        """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
     if x1_desc:
         if x1_desc.size == 0:
             pass
@@ -1271,7 +1271,7 @@ def tril(x1, k=0):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
     if x1_desc:
         if not isinstance(k, int):
             pass
@@ -1301,7 +1301,7 @@ def triu(x1, k=0):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
     if x1_desc:
         if not isinstance(k, int):
             pass
@@ -1340,7 +1340,7 @@ def vander(x1, N=None, increasing=False):
            [  1,   5,  25, 125]])
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
     if x1_desc:
         if x1.ndim != 1:
             pass
@@ -1425,7 +1425,7 @@ def zeros_like(x1, dtype=None, order='C', subok=False, shape=None):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
     if x1_desc:
         if order not in ('C', 'c', None):
             pass
diff --git a/dpnp/dpnp_iface_bitwise.py b/dpnp/dpnp_iface_bitwise.py
index 51c41504d421..51a28b0464ea 100644
--- a/dpnp/dpnp_iface_bitwise.py
+++ b/dpnp/dpnp_iface_bitwise.py
@@ -63,10 +63,10 @@ def _check_nd_call(origin_func, dpnp_func, x1, x2, dtype=None, out=None, where=T
 
     x1_is_scalar = dpnp.isscalar(x1)
     x2_is_scalar = dpnp.isscalar(x2)
-    x1_desc = dpnp.get_dpnp_descriptor(x1)
-    x2_desc = dpnp.get_dpnp_descriptor(x2)
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False)
-    x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
+    x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_nondefault_queue=False)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False)
+    x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False)
 
     if x1_desc and x2_desc and not kwargs:
         if not x1_desc and not x1_is_scalar:
@@ -90,7 +90,7 @@ def _check_nd_call(origin_func, dpnp_func, x1, x2, dtype=None, out=None, where=T
         elif not where:
             pass
         else:
-            out_desc = dpnp.get_dpnp_descriptor(out) if out is not None else None
+            out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) if out is not None else None
             return dpnp_func(x1_desc, x2_desc, dtype, out_desc, where).get_pyobj()
 
     return call_origin(origin_func, x1, x2, dtype=dtype, out=out, where=where, **kwargs)
@@ -228,7 +228,7 @@ def invert(x, **kwargs):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x)
+    x1_desc = dpnp.get_dpnp_descriptor(x, copy_when_nondefault_queue=False)
     if x1_desc and not kwargs:
         return dpnp_invert(x1_desc).get_pyobj()
 
diff --git a/dpnp/dpnp_iface_counting.py b/dpnp/dpnp_iface_counting.py
index 6ea5b1c6eaf9..9f14e3f36bfb 100644
--- a/dpnp/dpnp_iface_counting.py
+++ b/dpnp/dpnp_iface_counting.py
@@ -75,7 +75,7 @@ def count_nonzero(x1, axis=None, *, keepdims=False):
     5
 
     """
-    x1_desc = dpnp.get_dpnp_descriptor(x1)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
     if x1_desc:
         if axis is not None:
             pass
diff --git a/dpnp/dpnp_iface_indexing.py b/dpnp/dpnp_iface_indexing.py
index fa5204650d03..6ff554d89d3a 100644
--- a/dpnp/dpnp_iface_indexing.py
+++ b/dpnp/dpnp_iface_indexing.py
@@ -81,11 +81,11 @@ def choose(x1, choices, out=None, mode='raise'):
     --------
     :obj:`take_along_axis` : Preferable if choices is an array.
     """
-    x1_desc = dpnp.get_dpnp_descriptor(x1)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
 
     choices_list = []
     for choice in choices:
-        choices_list.append(dpnp.get_dpnp_descriptor(choice))
+        choices_list.append(dpnp.get_dpnp_descriptor(choice, copy_when_nondefault_queue=False))
 
     if x1_desc:
         if any(not desc for desc in choices_list):
@@ -186,7 +186,7 @@ def diag_indices_from(x1):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
     if x1_desc:
         # original limitation
         if not x1_desc.ndim >= 2:
@@ -216,7 +216,7 @@ def diagonal(x1, offset=0, axis1=0, axis2=1):
     Otherwise the function will be executed sequentially on CPU.
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
     if x1_desc:
         if not isinstance(offset, int):
             pass
@@ -248,7 +248,7 @@ def fill_diagonal(x1, val, wrap=False):
     :obj:`dpnp.diag_indices_from` : Return the indices to access the main diagonal of an n-dimensional array.
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False)
     if x1_desc:
         if not dpnp.isscalar(val):
             pass
@@ -329,7 +329,7 @@ def nonzero(x1):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
     if x1_desc:
         return dpnp_nonzero(x1_desc)
 
@@ -347,9 +347,9 @@ def place(x1, mask, vals):
     Parameter ``vals`` is supported as 1-D sequence.
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1)
-    mask_desc = dpnp.get_dpnp_descriptor(mask)
-    vals_desc = dpnp.get_dpnp_descriptor(vals)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
+    mask_desc = dpnp.get_dpnp_descriptor(mask, copy_when_nondefault_queue=False)
+    vals_desc = dpnp.get_dpnp_descriptor(vals, copy_when_nondefault_queue=False)
     if x1_desc and mask_desc and vals_desc:
         return dpnp_place(x1_desc, mask, vals_desc)
 
@@ -367,7 +367,7 @@ def put(x1, ind, v, mode='raise'):
     Not supported parameter mode.
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False)
     if x1_desc:
         if mode != 'raise':
             pass
@@ -391,9 +391,9 @@ def put_along_axis(x1, indices, values, axis):
     :obj:`take_along_axis` : Take values from the input array by matching 1d index and data slices.
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1)
-    indices_desc = dpnp.get_dpnp_descriptor(indices)
-    values_desc = dpnp.get_dpnp_descriptor(values)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
+    indices_desc = dpnp.get_dpnp_descriptor(indices, copy_when_nondefault_queue=False)
+    values_desc = dpnp.get_dpnp_descriptor(values, copy_when_nondefault_queue=False)
     if x1_desc and indices_desc and values_desc:
         if x1_desc.ndim != indices_desc.ndim:
             pass
@@ -419,9 +419,9 @@ def putmask(x1, mask, values):
     Input arrays ``arr``, ``mask`` and ``values``  are supported as :obj:`dpnp.ndarray`.
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False)
-    mask_desc = dpnp.get_dpnp_descriptor(mask)
-    values_desc = dpnp.get_dpnp_descriptor(values)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False)
+    mask_desc = dpnp.get_dpnp_descriptor(mask, copy_when_nondefault_queue=False)
+    values_desc = dpnp.get_dpnp_descriptor(values, copy_when_nondefault_queue=False)
     if x1_desc and mask_desc and values_desc:
         return dpnp_putmask(x1_desc, mask_desc, values_desc)
 
@@ -477,8 +477,8 @@ def take(x1, indices, axis=None, out=None, mode='raise'):
     :obj:`take_along_axis` : Take elements by matching the array and the index arrays.
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1)
-    indices_desc = dpnp.get_dpnp_descriptor(indices)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
+    indices_desc = dpnp.get_dpnp_descriptor(indices, copy_when_nondefault_queue=False)
     if x1_desc and indices_desc:
         if axis is not None:
             pass
@@ -503,8 +503,8 @@ def take_along_axis(x1, indices, axis):
     :obj:`put_along_axis` : Put values into the destination array by matching 1d index and data slices.
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1)
-    indices_desc = dpnp.get_dpnp_descriptor(indices)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
+    indices_desc = dpnp.get_dpnp_descriptor(indices, copy_when_nondefault_queue=False)
     if x1_desc and indices_desc:
         if x1_desc.ndim != indices_desc.ndim:
             pass
@@ -578,7 +578,7 @@ def tril_indices_from(x1, k=0):
         Diagonal offset (see `tril` for details).
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
     if x1_desc:
         if isinstance(k, int):
             return dpnp_tril_indices_from(x1_desc, k)
@@ -635,7 +635,7 @@ def triu_indices_from(x1, k=0):
         Diagonal offset (see `tril` for details).
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
     if x1_desc:
         if isinstance(k, int):
             return dpnp_triu_indices_from(x1_desc, k)
diff --git a/dpnp/dpnp_iface_linearalgebra.py b/dpnp/dpnp_iface_linearalgebra.py
index cf0e0ed2a212..7cd76d7c22b5 100644
--- a/dpnp/dpnp_iface_linearalgebra.py
+++ b/dpnp/dpnp_iface_linearalgebra.py
@@ -91,8 +91,8 @@ def dot(x1, x2, **kwargs):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False)
-    x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False)
+    x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False)
     if x1_desc and x2_desc and not kwargs:
         # TODO: remove fallback with scalars when muliply backend func will support strides
         if(x1_desc.ndim == 0 and x2_desc.strides is not None
@@ -183,8 +183,8 @@ def inner(x1, x2, **kwargs):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1)
-    x2_desc = dpnp.get_dpnp_descriptor(x2)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
+    x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_nondefault_queue=False)
     if x1_desc and x2_desc and not kwargs:
         return dpnp_inner(x1_desc, x2_desc).get_pyobj()
 
@@ -201,8 +201,8 @@ def kron(x1, x2):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1)
-    x2_desc = dpnp.get_dpnp_descriptor(x2)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
+    x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_nondefault_queue=False)
     if x1_desc and x2_desc:
         return dpnp_kron(x1_desc, x2_desc).get_pyobj()
 
@@ -277,7 +277,7 @@ def matmul(x1, x2, out=None, **kwargs):
                 if (array1_size > cost_size) and (array2_size > cost_size):
                     return dpnp_matmul(x1_desc, x2_desc, out)
             else:
-                out_desc = dpnp.get_dpnp_descriptor(out) if out is not None else None
+                out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) if out is not None else None
                 return dpnp_matmul(x1_desc, x2_desc, out_desc).get_pyobj()
 
     return call_origin(numpy.matmul, x1, x2, out=out, **kwargs)
@@ -312,8 +312,8 @@ def outer(x1, x2, **kwargs):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1)
-    x2_desc = dpnp.get_dpnp_descriptor(x2)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
+    x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_nondefault_queue=False)
     if x1_desc and x2_desc and not kwargs:
         return dpnp_outer(x1_desc, x2_desc).get_pyobj()
 
@@ -350,8 +350,8 @@ def tensordot(x1, x2, axes=2):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1)
-    x2_desc = dpnp.get_dpnp_descriptor(x2)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
+    x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_nondefault_queue=False)
     if x1_desc and x2_desc and (axes == 1):
         return dpnp_tensordot_not_implemented(x1_desc, x2_desc)  # dpnp_matmul
 
diff --git a/dpnp/dpnp_iface_logic.py b/dpnp/dpnp_iface_logic.py
index aa2f5159986a..0f1e1b5fc0e5 100644
--- a/dpnp/dpnp_iface_logic.py
+++ b/dpnp/dpnp_iface_logic.py
@@ -108,7 +108,7 @@ def all(x1, axis=None, out=None, keepdims=False):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
     if x1_desc:
         if axis is not None:
             pass
@@ -148,8 +148,8 @@ def allclose(x1, x2, rtol=1.e-5, atol=1.e-8, **kwargs):
 
     rtol_is_scalar = dpnp.isscalar(rtol)
     atol_is_scalar = dpnp.isscalar(atol)
-    x1_desc = dpnp.get_dpnp_descriptor(x1)
-    x2_desc = dpnp.get_dpnp_descriptor(x2)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
+    x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_nondefault_queue=False)
 
     if x1_desc and x2_desc and not kwargs:
         if not rtol_is_scalar or not atol_is_scalar:
@@ -202,7 +202,7 @@ def any(x1, axis=None, out=None, keepdims=False):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
     if x1_desc:
         if axis is not None:
             pass
diff --git a/dpnp/dpnp_iface_manipulation.py b/dpnp/dpnp_iface_manipulation.py
index 6a06db4cb5da..45938f0d52ad 100644
--- a/dpnp/dpnp_iface_manipulation.py
+++ b/dpnp/dpnp_iface_manipulation.py
@@ -85,7 +85,7 @@ def asfarray(x1, dtype=numpy.float64):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
     if x1_desc:
         # behavior of original function: int types replaced with float64
         if numpy.issubdtype(dtype, numpy.integer):
@@ -131,7 +131,7 @@ def atleast_2d(*arys):
     all_is_array = True
     arys_desc = []
     for ary in arys:
-        ary_desc = dpnp.get_dpnp_descriptor(ary)
+        ary_desc = dpnp.get_dpnp_descriptor(ary, copy_when_nondefault_queue=False)
         if ary_desc:
             arys_desc.append(ary_desc)
         else:
@@ -166,7 +166,7 @@ def atleast_3d(*arys):
     all_is_array = True
     arys_desc = []
     for ary in arys:
-        ary_desc = dpnp.get_dpnp_descriptor(ary)
+        ary_desc = dpnp.get_dpnp_descriptor(ary, copy_when_nondefault_queue=False)
         if ary_desc:
             arys_desc.append(ary_desc)
         else:
@@ -232,8 +232,8 @@ def copyto(dst, src, casting='same_kind', where=True):
 
     """
 
-    dst_desc = dpnp.get_dpnp_descriptor(dst, copy_when_strides=False)
-    src_desc = dpnp.get_dpnp_descriptor(src)
+    dst_desc = dpnp.get_dpnp_descriptor(dst, copy_when_strides=False, copy_when_nondefault_queue=False)
+    src_desc = dpnp.get_dpnp_descriptor(src, copy_when_nondefault_queue=False)
     if dst_desc and src_desc:
         if casting != 'same_kind':
             pass
@@ -315,7 +315,7 @@ def expand_dims(x1, axis):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
     if x1_desc:
         return dpnp_expand_dims(x1_desc, axis).get_pyobj()
 
@@ -370,7 +370,7 @@ def moveaxis(x1, source, destination):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
     if x1_desc:
         source_norm = normalize_axis(source, x1_desc.ndim)
         destination_norm = normalize_axis(destination, x1_desc.ndim)
@@ -418,7 +418,7 @@ def ravel(x1, order='C'):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
     if x1_desc:
         return dpnp_flatten(x1_desc).get_pyobj()
 
@@ -451,7 +451,7 @@ def repeat(x1, repeats, axis=None):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
     if x1_desc:
         if axis is not None and axis != 0:
             pass
@@ -478,7 +478,7 @@ def reshape(x1, newshape, order='C'):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
     if x1_desc:
         if order != 'C':
             pass
@@ -521,7 +521,7 @@ def rollaxis(x1, axis, start=0):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
     if x1_desc:
         if not isinstance(axis, int):
             pass
@@ -570,7 +570,7 @@ def squeeze(x1, axis=None):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
     if x1_desc:
         return dpnp_squeeze(x1_desc, axis).get_pyobj()
 
@@ -614,7 +614,7 @@ def swapaxes(x1, axis1, axis2):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
     if x1_desc:
         if axis1 >= x1_desc.ndim:
             pass
@@ -665,7 +665,7 @@ def transpose(x1, axes=None):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
     if x1_desc:
         if axes is not None:
             if not any(axes):
diff --git a/dpnp/dpnp_iface_searching.py b/dpnp/dpnp_iface_searching.py
index a5d3cfa671ae..cef5d686035b 100644
--- a/dpnp/dpnp_iface_searching.py
+++ b/dpnp/dpnp_iface_searching.py
@@ -95,7 +95,7 @@ def argmax(x1, axis=None, out=None):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
     if x1_desc:
         if axis is not None:
             pass
@@ -150,7 +150,7 @@ def argmin(x1, axis=None, out=None):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
     if x1_desc:
         if axis is not None:
             pass
diff --git a/dpnp/dpnp_iface_sorting.py b/dpnp/dpnp_iface_sorting.py
index 2e01335e9b0c..cdce86cbacc4 100644
--- a/dpnp/dpnp_iface_sorting.py
+++ b/dpnp/dpnp_iface_sorting.py
@@ -89,7 +89,7 @@ def argsort(in_array1, axis=-1, kind=None, order=None):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(in_array1)
+    x1_desc = dpnp.get_dpnp_descriptor(in_array1, copy_when_nondefault_queue=False)
     if x1_desc:
         if axis != -1:
             pass
@@ -115,7 +115,7 @@ def partition(x1, kth, axis=-1, kind='introselect', order=None):
     Parameters ``axis``, ``kind`` and ``order`` are supported only with default values.
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
     if x1_desc:
         if not isinstance(kth, int):
             pass
@@ -148,8 +148,8 @@ def searchsorted(x1, x2, side='left', sorter=None):
     Parameters ``sorter`` is supported only with default values.
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1)
-    x2_desc = dpnp.get_dpnp_descriptor(x2)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
+    x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_nondefault_queue=False)
     if 0 and x1_desc and x2_desc:
         if x1_desc.ndim != 1:
             pass
@@ -198,7 +198,7 @@ def sort(x1, **kwargs):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
     if x1_desc and not kwargs:
         if x1_desc.ndim != 1:
             pass
diff --git a/dpnp/dpnp_iface_statistics.py b/dpnp/dpnp_iface_statistics.py
index 960ee6bb51da..27eaf4a115f5 100644
--- a/dpnp/dpnp_iface_statistics.py
+++ b/dpnp/dpnp_iface_statistics.py
@@ -154,7 +154,7 @@ def average(x1, axis=None, weights=None, returned=False):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
     if x1_desc:
         if axis is not None:
             pass
@@ -221,8 +221,8 @@ def correlate(x1, x2, mode='valid'):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1)
-    x2_desc = dpnp.get_dpnp_descriptor(x2)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
+    x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_nondefault_queue=False)
     if x1_desc and x2_desc:
         if x1_desc.size != x2_desc.size or x1_desc.size == 0:
             pass
@@ -274,7 +274,7 @@ def cov(x1, y=None, rowvar=True, bias=False, ddof=None, fweights=None, aweights=
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
     if x1_desc:
         if x1_desc.ndim > 2:
             pass
@@ -292,7 +292,7 @@ def cov(x1, y=None, rowvar=True, bias=False, ddof=None, fweights=None, aweights=
             pass
         else:
             if x1_desc.dtype != dpnp.float64:
-                x1_desc = dpnp.get_dpnp_descriptor(dpnp.astype(x1, dpnp.float64))
+                x1_desc = dpnp.get_dpnp_descriptor(dpnp.astype(x1, dpnp.float64), copy_when_nondefault_queue=False)
 
             return dpnp_cov(x1_desc).get_pyobj()
 
@@ -350,7 +350,7 @@ def max(x1, axis=None, out=None, keepdims=False, initial=None, where=True):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
     if x1_desc:
         # Negative values in 'shape' are not allowed in input array
         # 306-322 check on negative and duplicate axis
@@ -427,7 +427,7 @@ def mean(x1, axis=None, **kwargs):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
     if x1_desc and not kwargs:
         if x1_desc.size == 0:
             pass
@@ -473,7 +473,7 @@ def median(x1, axis=None, out=None, overwrite_input=False, keepdims=False):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
     if x1_desc:
         if axis is not None:
             pass
@@ -516,7 +516,7 @@ def min(x1, axis=None, out=None, keepdims=False, initial=None, where=True):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
     if x1_desc:
         if out is not None:
             pass
@@ -551,7 +551,7 @@ def nanvar(x1, axis=None, dtype=None, out=None, ddof=0, keepdims=False):
     Otherwise the function will be executed sequentially on CPU.
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
     if x1_desc:
         if x1.size == 0:
             pass
@@ -609,7 +609,7 @@ def std(x1, axis=None, dtype=None, out=None, ddof=0, keepdims=False):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
     if x1_desc:
         if x1_desc.size == 0:
             pass
@@ -667,7 +667,7 @@ def var(x1, axis=None, dtype=None, out=None, ddof=0, keepdims=False):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
     if x1_desc:
         if x1_desc.size == 0:
             pass
diff --git a/dpnp/dpnp_iface_trigonometric.py b/dpnp/dpnp_iface_trigonometric.py
index 841bcde54325..c50ec260adae 100644
--- a/dpnp/dpnp_iface_trigonometric.py
+++ b/dpnp/dpnp_iface_trigonometric.py
@@ -108,7 +108,7 @@ def arccos(x1):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False)
     if x1_desc:
         return dpnp_arccos(x1_desc).get_pyobj()
 
@@ -145,7 +145,7 @@ def arccosh(x1):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False)
     if x1_desc:
         return dpnp_arccosh(x1_desc).get_pyobj()
 
@@ -184,9 +184,9 @@ def arcsin(x1, out=None, **kwargs):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False)
     if x1_desc:
-        out_desc = dpnp.get_dpnp_descriptor(out) if out is not None else None
+        out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) if out is not None else None
         return dpnp_arcsin(x1_desc, out_desc).get_pyobj()
 
     return call_origin(numpy.arcsin, x1, out=out, **kwargs)
@@ -214,7 +214,7 @@ def arcsinh(x1):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False)
     if x1_desc:
         return dpnp_arcsinh(x1_desc).get_pyobj()
 
@@ -249,9 +249,9 @@ def arctan(x1, out=None, **kwargs):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False)
     if x1_desc:
-        out_desc = dpnp.get_dpnp_descriptor(out) if out is not None else None
+        out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) if out is not None else None
         return dpnp_arctan(x1_desc, out_desc).get_pyobj()
 
     return call_origin(numpy.arctan, x1, out=out, **kwargs)
@@ -278,7 +278,7 @@ def arctanh(x1):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False)
     if x1_desc:
         return dpnp_arctanh(x1_desc).get_pyobj()
 
@@ -306,7 +306,7 @@ def cbrt(x1):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False)
     if x1_desc:
         return dpnp_cbrt(x1_desc).get_pyobj()
 
@@ -346,8 +346,8 @@ def arctan2(x1, x2, dtype=None, out=None, where=True, **kwargs):
 
     x1_is_scalar = dpnp.isscalar(x1)
     x2_is_scalar = dpnp.isscalar(x2)
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False)
-    x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False)
+    x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False)
 
     if x1_desc and x2_desc and not kwargs:
         if not x1_desc and not x1_is_scalar:
@@ -365,7 +365,7 @@ def arctan2(x1, x2, dtype=None, out=None, where=True, **kwargs):
         elif not where:
             pass
         else:
-            out_desc = dpnp.get_dpnp_descriptor(out) if out is not None else None
+            out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) if out is not None else None
             return dpnp_arctan2(x1_desc, x2_desc, dtype, out_desc, where).get_pyobj()
 
     return call_origin(numpy.arctan2, x1, x2, dtype=dtype, out=out, where=where, **kwargs)
@@ -393,9 +393,9 @@ def cos(x1, out=None, **kwargs):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False)
     if x1_desc:
-        out_desc = dpnp.get_dpnp_descriptor(out) if out is not None else None
+        out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) if out is not None else None
         return dpnp_cos(x1_desc, out_desc).get_pyobj()
 
     return call_origin(numpy.cos, x1, out=out, **kwargs)
@@ -422,7 +422,7 @@ def cosh(x1):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False)
     if x1_desc:
         return dpnp_cosh(x1_desc).get_pyobj()
 
@@ -473,7 +473,7 @@ def degrees(x1):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False)
     if x1_desc:
         return dpnp_degrees(x1_desc).get_pyobj()
 
@@ -506,9 +506,9 @@ def exp(x1, out=None, **kwargs):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False)
     if x1_desc:
-        out_desc = dpnp.get_dpnp_descriptor(out) if out is not None else None
+        out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) if out is not None else None
         return dpnp_exp(x1_desc, out_desc).get_pyobj()
 
     return call_origin(numpy.exp, x1, out=out, **kwargs)
@@ -540,7 +540,7 @@ def exp2(x1):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False)
     if x1_desc:
         return dpnp_exp2(x1_desc).get_pyobj()
 
@@ -570,7 +570,7 @@ def expm1(x1):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False)
     if x1_desc:
         return dpnp_expm1(x1_desc).get_pyobj()
 
@@ -604,8 +604,8 @@ def hypot(x1, x2, dtype=None, out=None, where=True, **kwargs):
 
     x1_is_scalar = dpnp.isscalar(x1)
     x2_is_scalar = dpnp.isscalar(x2)
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False)
-    x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False)
+    x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False)
 
     if x1_desc and x2_desc and not kwargs:
         if not x1_desc and not x1_is_scalar:
@@ -625,7 +625,7 @@ def hypot(x1, x2, dtype=None, out=None, where=True, **kwargs):
         elif not where:
             pass
         else:
-            out_desc = dpnp.get_dpnp_descriptor(out) if out is not None else None
+            out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) if out is not None else None
             return dpnp_hypot(x1_desc, x2_desc, dtype, out_desc, where).get_pyobj()
 
     return call_origin(numpy.hypot, x1, x2, dtype=dtype, out=out, where=where, **kwargs)
@@ -661,9 +661,9 @@ def log(x1, out=None, **kwargs):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False)
     if x1_desc:
-        out_desc = dpnp.get_dpnp_descriptor(out) if out is not None else None
+        out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) if out is not None else None
         return dpnp_log(x1_desc, out_desc).get_pyobj()
 
     return call_origin(numpy.log, x1, out=out, **kwargs)
@@ -690,7 +690,7 @@ def log10(x1):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False)
     if x1_desc:
         return dpnp_log10(x1_desc).get_pyobj()
 
@@ -722,7 +722,7 @@ def log1p(x1):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False)
     if x1_desc:
         return dpnp_log1p(x1_desc).get_pyobj()
 
@@ -758,7 +758,7 @@ def log2(x1):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False)
     if x1_desc:
         return dpnp_log2(x1_desc).get_pyobj()
 
@@ -788,7 +788,7 @@ def reciprocal(x1, **kwargs):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False)
     if x1_desc and not kwargs:
         return dpnp_recip(x1_desc).get_pyobj()
 
@@ -838,7 +838,7 @@ def radians(x1):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False)
     if x1_desc:
         return dpnp_radians(x1_desc).get_pyobj()
 
@@ -876,9 +876,9 @@ def sin(x1, out=None, **kwargs):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False)
     if x1_desc:
-        out_desc = dpnp.get_dpnp_descriptor(out) if out is not None else None
+        out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) if out is not None else None
         return dpnp_sin(x1_desc, out_desc).get_pyobj()
 
     return call_origin(numpy.sin, x1, out=out, **kwargs)
@@ -906,7 +906,7 @@ def sinh(x1):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False)
     if x1_desc:
         return dpnp_sinh(x1_desc).get_pyobj()
 
@@ -935,7 +935,7 @@ def sqrt(x1):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False)
     if x1_desc:
         return dpnp_sqrt(x1_desc).get_pyobj()
 
@@ -970,7 +970,7 @@ def square(x1):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False)
     if x1_desc:
         return dpnp_square(x1_desc).get_pyobj()
 
@@ -1000,9 +1000,9 @@ def tan(x1, out=None, **kwargs):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False)
     if x1_desc:
-        out_desc = dpnp.get_dpnp_descriptor(out) if out is not None else None
+        out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) if out is not None else None
         return dpnp_tan(x1_desc, out_desc).get_pyobj()
 
     return call_origin(numpy.tan, x1, out=out, **kwargs)
@@ -1030,7 +1030,7 @@ def tanh(x1):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False)
     if x1_desc:
         return dpnp_tanh(x1_desc).get_pyobj()
 
@@ -1066,7 +1066,7 @@ def unwrap(x1):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1)
+    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
     if x1_desc:
         return dpnp_unwrap(x1_desc).get_pyobj()
 

From 8431a6473ae9a1b488904054cca526e124f5e942 Mon Sep 17 00:00:00 2001
From: Denis Smirnov <denis.smirnov@intel.com>
Date: Thu, 24 Feb 2022 04:39:28 -0600
Subject: [PATCH 05/18] Fix conjugate

---
 dpnp/backend/kernels/dpnp_krnl_elemwise.cpp | 2 +-
 tests/skipped_tests.tbl                     | 3 ---
 tests/skipped_tests_gpu.tbl                 | 3 ---
 3 files changed, 1 insertion(+), 7 deletions(-)

diff --git a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp
index e9864d3da8a9..98aff7eac758 100644
--- a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp
@@ -724,7 +724,7 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap)
     fmap[DPNPFuncName::DPNP_FN_CONJIGUATE_EXT][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_copy_c_ext<float>};
     fmap[DPNPFuncName::DPNP_FN_CONJIGUATE_EXT][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_copy_c_ext<double>};
     fmap[DPNPFuncName::DPNP_FN_CONJIGUATE_EXT][eft_C128][eft_C128] = {
-        eft_C128, (void*)dpnp_copy_c_ext<std::complex<double>>};
+        eft_C128, (void*)dpnp_conjugate_c_ext<std::complex<double>>};
 
     fmap[DPNPFuncName::DPNP_FN_COPY][eft_BLN][eft_BLN] = {eft_BLN, (void*)dpnp_copy_c_default<bool>};
     fmap[DPNPFuncName::DPNP_FN_COPY][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_copy_c_default<int32_t>};
diff --git a/tests/skipped_tests.tbl b/tests/skipped_tests.tbl
index 5692e09e014b..f9863951f5b2 100644
--- a/tests/skipped_tests.tbl
+++ b/tests/skipped_tests.tbl
@@ -1,8 +1,5 @@
-tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-conjugate-data2]
 tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-trapz-data19]
-tests/test_sycl_queue.py::test_1in_1out[opencl:cpu:0-conjugate-data2]
 tests/test_sycl_queue.py::test_1in_1out[opencl:cpu:0-trapz-data19]
-tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-conjugate-data2]
 tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-floor_divide-data12-data22]
 tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-remainder-data15-data25]
 tests/test_sycl_queue.py::test_broadcasting[opencl:cpu:0-floor_divide-data12-data22]
diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl
index 833fc84c6b25..15c48a564006 100644
--- a/tests/skipped_tests_gpu.tbl
+++ b/tests/skipped_tests_gpu.tbl
@@ -1,8 +1,5 @@
-tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-conjugate-data2]
 tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-trapz-data19]
-tests/test_sycl_queue.py::test_1in_1out[opencl:cpu:0-conjugate-data2]
 tests/test_sycl_queue.py::test_1in_1out[opencl:cpu:0-trapz-data19]
-tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-conjugate-data2]
 tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-floor_divide-data12-data22]
 tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-remainder-data15-data25]
 tests/test_sycl_queue.py::test_broadcasting[opencl:cpu:0-floor_divide-data12-data22]

From 6783c04ced53776206464f1c861dc9d853228625 Mon Sep 17 00:00:00 2001
From: Denis Smirnov <denis.smirnov@intel.com>
Date: Wed, 2 Mar 2022 18:19:52 +0300
Subject: [PATCH 06/18] Skip tests which test function abs

---
 tests/skipped_tests_gpu.tbl | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl
index 15c48a564006..859b8dc80e72 100644
--- a/tests/skipped_tests_gpu.tbl
+++ b/tests/skipped_tests_gpu.tbl
@@ -1,3 +1,7 @@
+tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-abs-data0]
+tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-abs-data0]
+tests/test_sycl_queue.py::test_1in_1out[opencl:cpu:0-abs-data0]
+
 tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-trapz-data19]
 tests/test_sycl_queue.py::test_1in_1out[opencl:cpu:0-trapz-data19]
 tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-floor_divide-data12-data22]

From 9a496d8796894e015ab0b3e5e32ecb9fafe916eb Mon Sep 17 00:00:00 2001
From: Denis Smirnov <denis.smirnov@intel.com>
Date: Wed, 2 Mar 2022 22:09:51 +0300
Subject: [PATCH 07/18] Skip tests which test function ceil

---
 tests/skipped_tests_gpu.tbl | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl
index 859b8dc80e72..4959eb24c5ac 100644
--- a/tests/skipped_tests_gpu.tbl
+++ b/tests/skipped_tests_gpu.tbl
@@ -1,6 +1,9 @@
 tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-abs-data0]
 tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-abs-data0]
 tests/test_sycl_queue.py::test_1in_1out[opencl:cpu:0-abs-data0]
+tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-ceil-data1]
+tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-ceil-data1]
+tests/test_sycl_queue.py::test_1in_1out[opencl:cpu:0-ceil-data1]
 
 tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-trapz-data19]
 tests/test_sycl_queue.py::test_1in_1out[opencl:cpu:0-trapz-data19]

From 0ac3872e94c827ff86918630004d6070d03b893d Mon Sep 17 00:00:00 2001
From: Denis Smirnov <denis.smirnov@intel.com>
Date: Wed, 2 Mar 2022 22:59:18 +0300
Subject: [PATCH 08/18] Skip tests which test function conjugate

---
 tests/skipped_tests_gpu.tbl | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl
index 4959eb24c5ac..eaa571f8d02f 100644
--- a/tests/skipped_tests_gpu.tbl
+++ b/tests/skipped_tests_gpu.tbl
@@ -4,6 +4,9 @@ tests/test_sycl_queue.py::test_1in_1out[opencl:cpu:0-abs-data0]
 tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-ceil-data1]
 tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-ceil-data1]
 tests/test_sycl_queue.py::test_1in_1out[opencl:cpu:0-ceil-data1]
+tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-conjugate-data2]
+tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-conjugate-data2]
+tests/test_sycl_queue.py::test_1in_1out[opencl:cpu:0-conjugate-data2]
 
 tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-trapz-data19]
 tests/test_sycl_queue.py::test_1in_1out[opencl:cpu:0-trapz-data19]

From 181490e968d00e07adc1817d9c41eef30b856d42 Mon Sep 17 00:00:00 2001
From: Denis Smirnov <denis.smirnov@intel.com>
Date: Thu, 3 Mar 2022 11:18:32 +0300
Subject: [PATCH 09/18] Skip level_zero tests

---
 tests/skipped_tests_gpu.tbl | 56 +++++++++++++++++++++++++++++++++----
 1 file changed, 50 insertions(+), 6 deletions(-)

diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl
index eaa571f8d02f..0d207e7a01b3 100644
--- a/tests/skipped_tests_gpu.tbl
+++ b/tests/skipped_tests_gpu.tbl
@@ -1,12 +1,56 @@
 tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-abs-data0]
-tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-abs-data0]
-tests/test_sycl_queue.py::test_1in_1out[opencl:cpu:0-abs-data0]
 tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-ceil-data1]
-tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-ceil-data1]
-tests/test_sycl_queue.py::test_1in_1out[opencl:cpu:0-ceil-data1]
 tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-conjugate-data2]
-tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-conjugate-data2]
-tests/test_sycl_queue.py::test_1in_1out[opencl:cpu:0-conjugate-data2]
+tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-copy-data3]
+tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-cumprod-data4]
+tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-cumsum-data5]
+tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-diff-data6]
+tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-ediff1d-data7]
+tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-fabs-data8]
+tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-floor-data9]
+tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-gradient-data10]
+tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-nancumprod-data11]
+tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-nancumsum-data12]
+tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-nanprod-data13]
+tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-nansum-data14]
+tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-negative-data15]
+tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-prod-data16]
+tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-sign-data17]
+tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-sum-data18]
+tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-trapz-data19]
+tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-trunc-data20]
+tests/test_sycl_queue.py::test_2in_1out[level_zero:gpu:0-add-data10-data20]
+tests/test_sycl_queue.py::test_2in_1out[level_zero:gpu:0-copysign-data11-data21]
+tests/test_sycl_queue.py::test_2in_1out[level_zero:gpu:0-cross-data12-data22]
+tests/test_sycl_queue.py::test_2in_1out[level_zero:gpu:0-divide-data13-data23]
+tests/test_sycl_queue.py::test_2in_1out[level_zero:gpu:0-floor_divide-data14-data24]
+tests/test_sycl_queue.py::test_2in_1out[level_zero:gpu:0-fmod-data15-data25]
+tests/test_sycl_queue.py::test_2in_1out[level_zero:gpu:0-maximum-data16-data26]
+tests/test_sycl_queue.py::test_2in_1out[level_zero:gpu:0-minimum-data17-data27]
+tests/test_sycl_queue.py::test_2in_1out[level_zero:gpu:0-multiply-data18-data28]
+tests/test_sycl_queue.py::test_2in_1out[level_zero:gpu:0-power-data19-data29]
+tests/test_sycl_queue.py::test_2in_1out[level_zero:gpu:0-remainder-data110-data210]
+tests/test_sycl_queue.py::test_2in_1out[level_zero:gpu:0-subtract-data111-data211]
+tests/test_sycl_queue.py::test_2in_1out[level_zero:gpu:0-matmul-data112-data212]
+tests/test_sycl_queue.py::test_broadcasting[level_zero:gpu:0-add-data10-data20]
+tests/test_sycl_queue.py::test_broadcasting[level_zero:gpu:0-divide-data11-data21]
+tests/test_sycl_queue.py::test_broadcasting[level_zero:gpu:0-floor_divide-data12-data22]
+tests/test_sycl_queue.py::test_broadcasting[level_zero:gpu:0-fmod-data13-data23]
+tests/test_sycl_queue.py::test_broadcasting[level_zero:gpu:0-multiply-data14-data24]
+tests/test_sycl_queue.py::test_broadcasting[level_zero:gpu:0-remainder-data15-data25]
+tests/test_sycl_queue.py::test_broadcasting[level_zero:gpu:0-subtract-data16-data26]
+tests/test_sycl_queue.py::test_out[level_zero:gpu:0-add-data10-data20]
+tests/test_sycl_queue.py::test_out[level_zero:gpu:0-copysign-data11-data21]
+tests/test_sycl_queue.py::test_out[level_zero:gpu:0-divide-data12-data22]
+tests/test_sycl_queue.py::test_out[level_zero:gpu:0-floor_divide-data13-data23]
+tests/test_sycl_queue.py::test_out[level_zero:gpu:0-fmod-data14-data24]
+tests/test_sycl_queue.py::test_out[level_zero:gpu:0-maximum-data15-data25]
+tests/test_sycl_queue.py::test_out[level_zero:gpu:0-minimum-data16-data26]
+tests/test_sycl_queue.py::test_out[level_zero:gpu:0-multiply-data17-data27]
+tests/test_sycl_queue.py::test_out[level_zero:gpu:0-power-data18-data28]
+tests/test_sycl_queue.py::test_out[level_zero:gpu:0-remainder-data19-data29]
+tests/test_sycl_queue.py::test_out[level_zero:gpu:0-subtract-data110-data210]
+tests/test_sycl_queue.py::test_modf[level_zero:gpu:0]
 
 tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-trapz-data19]
 tests/test_sycl_queue.py::test_1in_1out[opencl:cpu:0-trapz-data19]

From dd92cdb816d61f8da0d0b2278d3f5af6ec2859ad Mon Sep 17 00:00:00 2001
From: Denis Smirnov <denis.smirnov@intel.com>
Date: Fri, 4 Mar 2022 10:34:10 +0300
Subject: [PATCH 10/18] Skip all tests from test_sycl_queue.py

---
 tests/skipped_tests_gpu.tbl | 54 +++++++++++++++++++++++++++++++++++++
 1 file changed, 54 insertions(+)

diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl
index 0d207e7a01b3..a9c4cdd335e0 100644
--- a/tests/skipped_tests_gpu.tbl
+++ b/tests/skipped_tests_gpu.tbl
@@ -1,3 +1,57 @@
+tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-abs-data0]
+tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-ceil-data1]
+tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-conjugate-data2]
+tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-copy-data3]
+tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-cumprod-data4]
+tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-cumsum-data5]
+tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-diff-data6]
+tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-ediff1d-data7]
+tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-fabs-data8]
+tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-floor-data9]
+tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-gradient-data10]
+tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-nancumprod-data11]
+tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-nancumsum-data12]
+tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-nanprod-data13]
+tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-nansum-data14]
+tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-negative-data15]
+tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-prod-data16]
+tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-sign-data17]
+tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-sum-data18]
+tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-trapz-data19]
+tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-trunc-data20]
+tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-add-data10-data20]
+tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-copysign-data11-data21]
+tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-cross-data12-data22]
+tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-divide-data13-data23]
+tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-floor_divide-data14-data24]
+tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-fmod-data15-data25]
+tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-maximum-data16-data26]
+tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-minimum-data17-data27]
+tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-multiply-data18-data28]
+tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-power-data19-data29]
+tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-remainder-data110-data210]
+tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-subtract-data111-data211]
+tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-matmul-data112-data212]
+tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-add-data10-data20]
+tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-divide-data11-data21]
+tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-floor_divide-data12-data22]
+tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-fmod-data13-data23]
+tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-multiply-data14-data24]
+tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-remainder-data15-data25]
+tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-subtract-data16-data26]
+tests/test_sycl_queue.py::test_out[opencl:gpu:0-add-data10-data20]
+tests/test_sycl_queue.py::test_out[opencl:gpu:0-copysign-data11-data21]
+tests/test_sycl_queue.py::test_out[opencl:gpu:0-divide-data12-data22]
+tests/test_sycl_queue.py::test_out[opencl:gpu:0-floor_divide-data13-data23]
+tests/test_sycl_queue.py::test_out[opencl:gpu:0-fmod-data14-data24]
+tests/test_sycl_queue.py::test_out[opencl:gpu:0-maximum-data15-data25]
+tests/test_sycl_queue.py::test_out[opencl:gpu:0-minimum-data16-data26]
+tests/test_sycl_queue.py::test_out[opencl:gpu:0-multiply-data17-data27]
+tests/test_sycl_queue.py::test_out[opencl:gpu:0-power-data18-data28]
+tests/test_sycl_queue.py::test_out[opencl:gpu:0-remainder-data19-data29]
+tests/test_sycl_queue.py::test_out[opencl:gpu:0-subtract-data110-data210]
+tests/test_sycl_queue.py::test_modf[opencl:gpu:0]
+
 tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-abs-data0]
 tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-ceil-data1]
 tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-conjugate-data2]

From eb8dd52b0d2a3df409680975f3d75b913fae966a Mon Sep 17 00:00:00 2001
From: Denis Smirnov <denis.smirnov@intel.com>
Date: Fri, 4 Mar 2022 13:50:34 +0300
Subject: [PATCH 11/18] Unskip test for modf

---
 tests/skipped_tests_gpu.tbl | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl
index a9c4cdd335e0..fbc0bf813080 100644
--- a/tests/skipped_tests_gpu.tbl
+++ b/tests/skipped_tests_gpu.tbl
@@ -50,7 +50,6 @@ tests/test_sycl_queue.py::test_out[opencl:gpu:0-multiply-data17-data27]
 tests/test_sycl_queue.py::test_out[opencl:gpu:0-power-data18-data28]
 tests/test_sycl_queue.py::test_out[opencl:gpu:0-remainder-data19-data29]
 tests/test_sycl_queue.py::test_out[opencl:gpu:0-subtract-data110-data210]
-tests/test_sycl_queue.py::test_modf[opencl:gpu:0]
 
 tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-abs-data0]
 tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-ceil-data1]

From 3f99c620a2507de859ab61336dbf1e95a132da55 Mon Sep 17 00:00:00 2001
From: Denis Smirnov <denis.smirnov@intel.com>
Date: Fri, 4 Mar 2022 15:13:34 +0300
Subject: [PATCH 12/18] Unskip tests/test_sycl_queue.py::test_out

---
 tests/skipped_tests_gpu.tbl | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl
index fbc0bf813080..a98cc18a9c90 100644
--- a/tests/skipped_tests_gpu.tbl
+++ b/tests/skipped_tests_gpu.tbl
@@ -39,17 +39,6 @@ tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-fmod-data13-data23]
 tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-multiply-data14-data24]
 tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-remainder-data15-data25]
 tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-subtract-data16-data26]
-tests/test_sycl_queue.py::test_out[opencl:gpu:0-add-data10-data20]
-tests/test_sycl_queue.py::test_out[opencl:gpu:0-copysign-data11-data21]
-tests/test_sycl_queue.py::test_out[opencl:gpu:0-divide-data12-data22]
-tests/test_sycl_queue.py::test_out[opencl:gpu:0-floor_divide-data13-data23]
-tests/test_sycl_queue.py::test_out[opencl:gpu:0-fmod-data14-data24]
-tests/test_sycl_queue.py::test_out[opencl:gpu:0-maximum-data15-data25]
-tests/test_sycl_queue.py::test_out[opencl:gpu:0-minimum-data16-data26]
-tests/test_sycl_queue.py::test_out[opencl:gpu:0-multiply-data17-data27]
-tests/test_sycl_queue.py::test_out[opencl:gpu:0-power-data18-data28]
-tests/test_sycl_queue.py::test_out[opencl:gpu:0-remainder-data19-data29]
-tests/test_sycl_queue.py::test_out[opencl:gpu:0-subtract-data110-data210]
 
 tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-abs-data0]
 tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-ceil-data1]

From f4f466a0855d552d2ee29e00326298db1083bcb7 Mon Sep 17 00:00:00 2001
From: Denis Smirnov <denis.smirnov@intel.com>
Date: Fri, 4 Mar 2022 16:01:18 +0300
Subject: [PATCH 13/18] Unskip tests/test_sycl_queue.py::test_broadcasting

---
 tests/skipped_tests_gpu.tbl | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl
index a98cc18a9c90..618b72a61498 100644
--- a/tests/skipped_tests_gpu.tbl
+++ b/tests/skipped_tests_gpu.tbl
@@ -32,13 +32,6 @@ tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-power-data19-data29]
 tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-remainder-data110-data210]
 tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-subtract-data111-data211]
 tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-matmul-data112-data212]
-tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-add-data10-data20]
-tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-divide-data11-data21]
-tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-floor_divide-data12-data22]
-tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-fmod-data13-data23]
-tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-multiply-data14-data24]
-tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-remainder-data15-data25]
-tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-subtract-data16-data26]
 
 tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-abs-data0]
 tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-ceil-data1]

From 47edea45f4578bc3e2437f525b73e7c01835a6b5 Mon Sep 17 00:00:00 2001
From: Denis Smirnov <denis.smirnov@intel.com>
Date: Fri, 4 Mar 2022 17:37:36 +0300
Subject: [PATCH 14/18] Unskip test_1in_1out and test_2in_1out

---
 tests/skipped_tests_gpu.tbl | 41 +++++++------------------------------
 1 file changed, 7 insertions(+), 34 deletions(-)

diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl
index 618b72a61498..84267f8a30d7 100644
--- a/tests/skipped_tests_gpu.tbl
+++ b/tests/skipped_tests_gpu.tbl
@@ -1,37 +1,10 @@
-tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-abs-data0]
-tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-ceil-data1]
-tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-conjugate-data2]
-tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-copy-data3]
-tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-cumprod-data4]
-tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-cumsum-data5]
-tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-diff-data6]
-tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-ediff1d-data7]
-tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-fabs-data8]
-tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-floor-data9]
-tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-gradient-data10]
-tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-nancumprod-data11]
-tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-nancumsum-data12]
-tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-nanprod-data13]
-tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-nansum-data14]
-tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-negative-data15]
-tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-prod-data16]
-tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-sign-data17]
-tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-sum-data18]
-tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-trapz-data19]
-tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-trunc-data20]
-tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-add-data10-data20]
-tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-copysign-data11-data21]
-tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-cross-data12-data22]
-tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-divide-data13-data23]
-tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-floor_divide-data14-data24]
-tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-fmod-data15-data25]
-tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-maximum-data16-data26]
-tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-minimum-data17-data27]
-tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-multiply-data18-data28]
-tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-power-data19-data29]
-tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-remainder-data110-data210]
-tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-subtract-data111-data211]
-tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-matmul-data112-data212]
+tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-add-data10-data20]
+tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-divide-data11-data21]
+tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-floor_divide-data12-data22]
+tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-fmod-data13-data23]
+tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-multiply-data14-data24]
+tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-remainder-data15-data25]
+tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-subtract-data16-data26]
 
 tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-abs-data0]
 tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-ceil-data1]

From 8e632084d5ede75196cc19024f0bd3ef0f5d661e Mon Sep 17 00:00:00 2001
From: Denis Smirnov <denis.smirnov@intel.com>
Date: Fri, 4 Mar 2022 20:20:50 +0300
Subject: [PATCH 15/18] Unskip test_2in_1out

---
 tests/skipped_tests_gpu.tbl | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl
index 84267f8a30d7..ae77cd92f512 100644
--- a/tests/skipped_tests_gpu.tbl
+++ b/tests/skipped_tests_gpu.tbl
@@ -1,3 +1,24 @@
+tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-abs-data0]
+tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-ceil-data1]
+tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-conjugate-data2]
+tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-copy-data3]
+tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-cumprod-data4]
+tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-cumsum-data5]
+tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-diff-data6]
+tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-ediff1d-data7]
+tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-fabs-data8]
+tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-floor-data9]
+tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-gradient-data10]
+tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-nancumprod-data11]
+tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-nancumsum-data12]
+tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-nanprod-data13]
+tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-nansum-data14]
+tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-negative-data15]
+tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-prod-data16]
+tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-sign-data17]
+tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-sum-data18]
+tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-trapz-data19]
+tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-trunc-data20]
 tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-add-data10-data20]
 tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-divide-data11-data21]
 tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-floor_divide-data12-data22]

From 39f8b8728ce0e0670f258e26ce385ce035772539 Mon Sep 17 00:00:00 2001
From: Denis Smirnov <denis.smirnov@intel.com>
Date: Fri, 4 Mar 2022 21:48:53 +0300
Subject: [PATCH 16/18] Unskip test_1in_1out

---
 tests/skipped_tests_gpu.tbl | 34 +++++++++++++---------------------
 1 file changed, 13 insertions(+), 21 deletions(-)

diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl
index ae77cd92f512..0f461ed6c833 100644
--- a/tests/skipped_tests_gpu.tbl
+++ b/tests/skipped_tests_gpu.tbl
@@ -1,24 +1,16 @@
-tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-abs-data0]
-tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-ceil-data1]
-tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-conjugate-data2]
-tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-copy-data3]
-tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-cumprod-data4]
-tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-cumsum-data5]
-tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-diff-data6]
-tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-ediff1d-data7]
-tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-fabs-data8]
-tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-floor-data9]
-tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-gradient-data10]
-tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-nancumprod-data11]
-tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-nancumsum-data12]
-tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-nanprod-data13]
-tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-nansum-data14]
-tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-negative-data15]
-tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-prod-data16]
-tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-sign-data17]
-tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-sum-data18]
-tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-trapz-data19]
-tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-trunc-data20]
+tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-add-data10-data20]
+tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-copysign-data11-data21]
+tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-cross-data12-data22]
+tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-divide-data13-data23]
+tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-floor_divide-data14-data24]
+tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-fmod-data15-data25]
+tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-maximum-data16-data26]
+tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-minimum-data17-data27]
+tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-multiply-data18-data28]
+tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-power-data19-data29]
+tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-remainder-data110-data210]
+tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-subtract-data111-data211]
+tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-matmul-data112-data212]
 tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-add-data10-data20]
 tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-divide-data11-data21]
 tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-floor_divide-data12-data22]

From 78d6e28588ef9dbf81d19377875de1d006e22a64 Mon Sep 17 00:00:00 2001
From: Denis Smirnov <denis.smirnov@intel.com>
Date: Sat, 5 Mar 2022 10:22:07 +0300
Subject: [PATCH 17/18] Skip test_1in_1out

---
 tests/skipped_tests_gpu.tbl | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl
index 0f461ed6c833..a98cc18a9c90 100644
--- a/tests/skipped_tests_gpu.tbl
+++ b/tests/skipped_tests_gpu.tbl
@@ -1,3 +1,24 @@
+tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-abs-data0]
+tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-ceil-data1]
+tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-conjugate-data2]
+tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-copy-data3]
+tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-cumprod-data4]
+tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-cumsum-data5]
+tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-diff-data6]
+tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-ediff1d-data7]
+tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-fabs-data8]
+tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-floor-data9]
+tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-gradient-data10]
+tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-nancumprod-data11]
+tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-nancumsum-data12]
+tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-nanprod-data13]
+tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-nansum-data14]
+tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-negative-data15]
+tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-prod-data16]
+tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-sign-data17]
+tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-sum-data18]
+tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-trapz-data19]
+tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-trunc-data20]
 tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-add-data10-data20]
 tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-copysign-data11-data21]
 tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-cross-data12-data22]

From 174c0f1fb7d39e15ca9a4e8b917404462ec75283 Mon Sep 17 00:00:00 2001
From: Denis Smirnov <denis.smirnov@intel.com>
Date: Sat, 5 Mar 2022 12:13:23 +0300
Subject: [PATCH 18/18] Unskip half of test_1in_1out

---
 tests/skipped_tests_gpu.tbl | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl
index a98cc18a9c90..cf173445b6d2 100644
--- a/tests/skipped_tests_gpu.tbl
+++ b/tests/skipped_tests_gpu.tbl
@@ -8,17 +8,7 @@ tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-diff-data6]
 tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-ediff1d-data7]
 tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-fabs-data8]
 tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-floor-data9]
-tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-gradient-data10]
-tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-nancumprod-data11]
-tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-nancumsum-data12]
-tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-nanprod-data13]
-tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-nansum-data14]
-tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-negative-data15]
-tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-prod-data16]
-tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-sign-data17]
-tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-sum-data18]
-tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-trapz-data19]
-tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-trunc-data20]
+
 tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-add-data10-data20]
 tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-copysign-data11-data21]
 tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-cross-data12-data22]