diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
index 17140158deee..b5edc3985308 100644
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -2,4 +2,5 @@
 - [ ] Have you added a test, reproducer or referred to issue with a reproducer?
 - [ ] Have you tested your changes locally for CPU and GPU devices?
 - [ ] Have you made sure that new changes do not introduce compiler warnings?
+- [ ] Have you checked performance impact of proposed changes?
 - [ ] If this PR is a work in progress, are you filing the PR as a draft?
diff --git a/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp b/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp
index 0964f18df81e..9cd41f11cb19 100644
--- a/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp
+++ b/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp
@@ -138,9 +138,9 @@ MACRO_2ARG_3TYPES_OP(dpnp_divide_c,
                      MACRO_UNPACK_TYPES(float, double, std::complex<float>, std::complex<double>))
 
 MACRO_2ARG_3TYPES_OP(dpnp_fmod_c,
-                     sycl::fmod((double)input1_elem, (double)input2_elem),
-                     nullptr,
-                     std::false_type,
+                     dispatch_fmod_op(input1_elem, input2_elem),
+                     x1 % x2,
+                     MACRO_UNPACK_TYPES(bool, std::int32_t, std::int64_t),
                      oneapi::mkl::vm::fmod,
                      MACRO_UNPACK_TYPES(float, double))
 
diff --git a/dpnp/backend/kernels/dpnp_krnl_bitwise.cpp b/dpnp/backend/kernels/dpnp_krnl_bitwise.cpp
index f3d8a4a95ccd..6264d5d7146d 100644
--- a/dpnp/backend/kernels/dpnp_krnl_bitwise.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_bitwise.cpp
@@ -281,8 +281,8 @@ static void func_map_init_bitwise_1arg_1type(func_map_t& fmap)
                 const size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */                  \
                 {                                                                                                      \
                     const shape_elem_type* result_strides_data = &dev_strides_data[0];                                 \
-                    const shape_elem_type* input1_strides_data = &dev_strides_data[1];                                 \
-                    const shape_elem_type* input2_strides_data = &dev_strides_data[2];                                 \
+                    const shape_elem_type* input1_strides_data = &dev_strides_data[result_ndim];                       \
+                    const shape_elem_type* input2_strides_data = &dev_strides_data[2 * result_ndim];                   \
                                                                                                                        \
                     size_t input1_id = 0;                                                                              \
                     size_t input2_id = 0;                                                                              \
diff --git a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp
index 741a945fb099..2b3417ee0011 100644
--- a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp
@@ -111,7 +111,7 @@
                 size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */                        \
                 {                                                                                                      \
                     const shape_elem_type* result_strides_data = &dev_strides_data[0];                                 \
-                    const shape_elem_type* input1_strides_data = &dev_strides_data[1];                                 \
+                    const shape_elem_type* input1_strides_data = &dev_strides_data[result_ndim];                       \
                                                                                                                        \
                     size_t input_id = 0;                                                                               \
                     for (size_t i = 0; i < input1_ndim; ++i)                                                           \
@@ -635,7 +635,7 @@ static void func_map_init_elemwise_1arg_2type(func_map_t& fmap)
                 size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */                        \
                 {                                                                                                      \
                     const shape_elem_type* result_strides_data = &dev_strides_data[0];                                 \
-                    const shape_elem_type* input1_strides_data = &dev_strides_data[1];                                 \
+                    const shape_elem_type* input1_strides_data = &dev_strides_data[result_ndim];                       \
                                                                                                                        \
                     size_t input_id = 0;                                                                               \
                     for (size_t i = 0; i < input1_ndim; ++i)                                                           \
@@ -848,6 +848,18 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap)
     return;
 }
 
+template <typename T>
+constexpr auto dispatch_fmod_op(T elem1, T elem2)
+{
+    if constexpr (is_any_v<T, std::int32_t, std::int64_t>)
+    {
+        return elem1 % elem2;
+    }
+    else
+    {
+        return sycl::fmod(elem1, elem2);
+    }
+}
 
 #define MACRO_2ARG_3TYPES_OP(                                                                                          \
     __name__, __operation__, __vec_operation__, __vec_types__, __mkl_operation__, __mkl_types__)                       \
@@ -995,8 +1007,8 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap)
                 const size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */                  \
                 {                                                                                                      \
                     const shape_elem_type* result_strides_data = &dev_strides_data[0];                                 \
-                    const shape_elem_type* input1_strides_data = &dev_strides_data[1];                                 \
-                    const shape_elem_type* input2_strides_data = &dev_strides_data[2];                                 \
+                    const shape_elem_type* input1_strides_data = &dev_strides_data[result_ndim];                       \
+                    const shape_elem_type* input2_strides_data = &dev_strides_data[2 * result_ndim];                   \
                                                                                                                        \
                     size_t input1_id = 0;                                                                              \
                     size_t input2_id = 0;                                                                              \
@@ -1261,6 +1273,16 @@ static constexpr DPNPFuncType get_divide_res_type()
     return widest_type;
 }
 
+template <DPNPFuncType FT1, DPNPFuncType FT2>
+static constexpr DPNPFuncType get_fmod_res_type()
+{
+    if constexpr ((FT1 == DPNPFuncType::DPNP_FT_BOOL) && (FT2 == DPNPFuncType::DPNP_FT_BOOL))
+    {
+        return DPNPFuncType::DPNP_FT_INT;
+    }
+    return populate_func_types<FT1, FT2>();
+}
+
 template <DPNPFuncType FT1, DPNPFuncType... FTs>
 static void func_map_elemwise_2arg_3type_core(func_map_t& fmap)
 {
@@ -1300,12 +1322,29 @@ static void func_map_elemwise_2arg_3type_core(func_map_t& fmap)
      ...);
 }
 
+template <DPNPFuncType FT1, DPNPFuncType... FTs>
+static void func_map_elemwise_2arg_3type_core_no_complex(func_map_t& fmap)
+{
+    ((fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][FT1][FTs] =
+          {get_fmod_res_type<FT1, FTs>(),
+           (void*)dpnp_fmod_c_ext<func_type_map_t::find_type<get_fmod_res_type<FT1, FTs>()>,
+                                  func_type_map_t::find_type<FT1>,
+                                  func_type_map_t::find_type<FTs>>}),
+     ...);
+}
+
 template <DPNPFuncType... FTs>
 static void func_map_elemwise_2arg_3type_helper(func_map_t& fmap)
 {
     ((func_map_elemwise_2arg_3type_core<FTs, FTs...>(fmap)), ...);
 }
 
+template <DPNPFuncType... FTs>
+static void func_map_elemwise_2arg_3type_helper_no_complex(func_map_t& fmap)
+{
+    ((func_map_elemwise_2arg_3type_core_no_complex<FTs, FTs...>(fmap)), ...);
+}
+
 static void func_map_init_elemwise_2arg_3type(func_map_t& fmap)
 {
     fmap[DPNPFuncName::DPNP_FN_ADD][eft_INT][eft_INT] = {eft_INT,
@@ -1539,39 +1578,6 @@ static void func_map_init_elemwise_2arg_3type(func_map_t& fmap)
     fmap[DPNPFuncName::DPNP_FN_FMOD][eft_DBL][eft_DBL] = {eft_DBL,
                                                           (void*)dpnp_fmod_c_default<double, double, double>};
 
-    fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_INT][eft_INT] = {eft_INT,
-                                                              (void*)dpnp_fmod_c_ext<int32_t, int32_t, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_INT][eft_LNG] = {eft_LNG,
-                                                              (void*)dpnp_fmod_c_ext<int64_t, int32_t, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_INT][eft_FLT] = {eft_DBL,
-                                                              (void*)dpnp_fmod_c_ext<double, int32_t, float>};
-    fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_INT][eft_DBL] = {eft_DBL,
-                                                              (void*)dpnp_fmod_c_ext<double, int32_t, double>};
-    fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_LNG][eft_INT] = {eft_LNG,
-                                                              (void*)dpnp_fmod_c_ext<int64_t, int64_t, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_LNG][eft_LNG] = {eft_LNG,
-                                                              (void*)dpnp_fmod_c_ext<int64_t, int64_t, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_LNG][eft_FLT] = {eft_DBL,
-                                                              (void*)dpnp_fmod_c_ext<double, int64_t, float>};
-    fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_LNG][eft_DBL] = {eft_DBL,
-                                                              (void*)dpnp_fmod_c_ext<double, int64_t, double>};
-    fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_FLT][eft_INT] = {eft_DBL,
-                                                              (void*)dpnp_fmod_c_ext<double, float, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_FLT][eft_LNG] = {eft_DBL,
-                                                              (void*)dpnp_fmod_c_ext<double, float, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_FLT][eft_FLT] = {eft_FLT,
-                                                              (void*)dpnp_fmod_c_ext<float, float, float>};
-    fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_FLT][eft_DBL] = {eft_DBL,
-                                                              (void*)dpnp_fmod_c_ext<double, float, double>};
-    fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_DBL][eft_INT] = {eft_DBL,
-                                                              (void*)dpnp_fmod_c_ext<double, double, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_DBL][eft_LNG] = {eft_DBL,
-                                                              (void*)dpnp_fmod_c_ext<double, double, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_DBL][eft_FLT] = {eft_DBL,
-                                                              (void*)dpnp_fmod_c_ext<double, double, float>};
-    fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_DBL][eft_DBL] = {eft_DBL,
-                                                              (void*)dpnp_fmod_c_ext<double, double, double>};
-
     fmap[DPNPFuncName::DPNP_FN_HYPOT][eft_INT][eft_INT] = {eft_DBL,
                                                            (void*)dpnp_hypot_c_default<double, int32_t, int32_t>};
     fmap[DPNPFuncName::DPNP_FN_HYPOT][eft_INT][eft_LNG] = {eft_DBL,
@@ -1918,6 +1924,7 @@ static void func_map_init_elemwise_2arg_3type(func_map_t& fmap)
         eft_DBL, (void*)dpnp_subtract_c_default<double, double, double>};
 
     func_map_elemwise_2arg_3type_helper<eft_BLN, eft_INT, eft_LNG, eft_FLT, eft_DBL, eft_C64, eft_C128>(fmap);
+    func_map_elemwise_2arg_3type_helper_no_complex<eft_BLN, eft_INT, eft_LNG, eft_FLT, eft_DBL>(fmap);
 
     return;
 }
diff --git a/dpnp/backend/kernels/dpnp_krnl_logic.cpp b/dpnp/backend/kernels/dpnp_krnl_logic.cpp
index d1a6767c2adc..78a9a29e99e7 100644
--- a/dpnp/backend/kernels/dpnp_krnl_logic.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_logic.cpp
@@ -396,7 +396,7 @@ DPCTLSyclEventRef (*dpnp_any_ext_c)(DPCTLSyclQueueRef,
                 const size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */                  \
                 {                                                                                                      \
                     const shape_elem_type *result_strides_data = &dev_strides_data[0];                                 \
-                    const shape_elem_type *input1_strides_data = &dev_strides_data[1];                                 \
+                    const shape_elem_type *input1_strides_data = &dev_strides_data[result_ndim];                       \
                                                                                                                        \
                     size_t input1_id = 0;                                                                              \
                                                                                                                        \
@@ -635,8 +635,8 @@ static void func_map_logic_1arg_1type_helper(func_map_t& fmap)
                 const size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */                  \
                 {                                                                                                      \
                     const shape_elem_type *result_strides_data = &dev_strides_data[0];                                 \
-                    const shape_elem_type *input1_strides_data = &dev_strides_data[1];                                 \
-                    const shape_elem_type *input2_strides_data = &dev_strides_data[2];                                 \
+                    const shape_elem_type *input1_strides_data = &dev_strides_data[result_ndim];                       \
+                    const shape_elem_type *input2_strides_data = &dev_strides_data[2 * result_ndim];                   \
                                                                                                                        \
                     size_t input1_id = 0;                                                                              \
                     size_t input2_id = 0;                                                                              \
diff --git a/dpnp/backend/kernels/dpnp_krnl_searching.cpp b/dpnp/backend/kernels/dpnp_krnl_searching.cpp
index fef5f78d15da..471d524643f5 100644
--- a/dpnp/backend/kernels/dpnp_krnl_searching.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_searching.cpp
@@ -294,9 +294,9 @@ DPCTLSyclEventRef dpnp_where_c(DPCTLSyclQueueRef q_ref,
             const size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */
             {
                 const shape_elem_type* result_strides_data = &dev_strides_data[0];
-                const shape_elem_type* condition_strides_data = &dev_strides_data[1];
-                const shape_elem_type* input1_strides_data = &dev_strides_data[2];
-                const shape_elem_type* input2_strides_data = &dev_strides_data[3];
+                const shape_elem_type* condition_strides_data = &dev_strides_data[result_ndim];
+                const shape_elem_type* input1_strides_data = &dev_strides_data[2 * result_ndim];
+                const shape_elem_type* input2_strides_data = &dev_strides_data[3 * result_ndim];
 
                 size_t condition_id = 0;
                 size_t input1_id = 0;
diff --git a/dpnp/dpnp_algo/dpnp_algo.pyx b/dpnp/dpnp_algo/dpnp_algo.pyx
index 2fa9de34b998..923454142f43 100644
--- a/dpnp/dpnp_algo/dpnp_algo.pyx
+++ b/dpnp/dpnp_algo/dpnp_algo.pyx
@@ -505,8 +505,23 @@ cdef utils.dpnp_descriptor call_fptr_2in_1out_strides(DPNPFuncName fptr_name,
         return_type = kernel_data.return_type_no_fp64
         func = < fptr_2in_1out_strides_t > kernel_data.ptr_no_fp64
 
-    if out is None:
-        """ Create result array with type given by FPTR data """
+    # check 'out' parameter data
+    if out is not None:
+        if out.shape != result_shape:
+            utils.checker_throw_value_error(func_name, 'out.shape', out.shape, result_shape)
+
+        utils.get_common_usm_allocation(x1_obj, out)  # check USM allocation is common
+
+    if out is None or out.is_array_overlapped(x1_obj) or out.is_array_overlapped(x2_obj) or not out.match_ctype(return_type):
+        """
+        Create result array with type given by FPTR data.
+        If 'out' array has another dtype than expected or overlaps a memory from any input array,
+        we have to create a temporary array and to copy data from the temporary into 'out' array,
+        once the computation is completed.
+        Otherwise simultaneously access to the same memory may cause a race condition issue
+        which will result into undefined behaviour.
+        """
+        is_result_memory_allocated = True
         result = utils.create_output_descriptor(result_shape,
                                                 return_type,
                                                 None,
@@ -514,16 +529,9 @@ cdef utils.dpnp_descriptor call_fptr_2in_1out_strides(DPNPFuncName fptr_name,
                                                 usm_type=result_usm_type,
                                                 sycl_queue=result_sycl_queue)
     else:
-        result_type = dpnp_DPNPFuncType_to_dtype(< size_t > return_type)
-        if out.dtype != result_type:
-            utils.checker_throw_value_error(func_name, 'out.dtype', out.dtype, result_type)
-        if out.shape != result_shape:
-            utils.checker_throw_value_error(func_name, 'out.shape', out.shape, result_shape)
-
+        is_result_memory_allocated = False
         result = out
 
-        utils.get_common_usm_allocation(x1_obj, result)  # check USM allocation is common
-
     cdef shape_type_c result_strides = utils.strides_to_vector(result.strides, result_shape)
 
     result_obj = result.get_array()
@@ -554,4 +562,7 @@ cdef utils.dpnp_descriptor call_fptr_2in_1out_strides(DPNPFuncName fptr_name,
     with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
     c_dpctl.DPCTLEvent_Delete(event_ref)
 
-    return result
+    if out is not None and is_result_memory_allocated:
+        return out.get_result_desc(result)
+
+    return result.get_result_desc()
diff --git a/dpnp/dpnp_array.py b/dpnp/dpnp_array.py
index f2ccf56ef76b..5741ea0fa145 100644
--- a/dpnp/dpnp_array.py
+++ b/dpnp/dpnp_array.py
@@ -29,6 +29,23 @@
 
 import dpnp
 
+
+def _get_unwrapped_index_key(key):
+    """
+    Return a key where each nested instance of DPNP array is unwrapped into USM ndarray
+    for futher processing in DPCTL advanced indexing functions.
+
+    """
+
+    if isinstance(key, tuple):
+        if any(isinstance(x, dpnp_array) for x in key):
+            # create a new tuple from the input key with unwrapped DPNP arrays
+            return tuple(x.get_array() if isinstance(x, dpnp_array) else x for x in key)
+    elif isinstance(key, dpnp_array):
+        return key.get_array()
+    return key
+
+
 class dpnp_array:
     """
     Multi-dimensional array object.
@@ -176,8 +193,7 @@ def __ge__(self, other):
  # '__getattribute__',
 
     def __getitem__(self, key):
-        if isinstance(key, dpnp_array):
-            key = key.get_array()
+        key = _get_unwrapped_index_key(key)
 
         item = self._array_obj.__getitem__(key)
         if not isinstance(item, dpt.usm_ndarray):
@@ -194,7 +210,10 @@ def __gt__(self, other):
         return dpnp.greater(self, other)
 
  # '__hash__',
- # '__iadd__',
+
+    def __iadd__(self, other):
+        dpnp.add(self, other, out=self)
+        return self
 
     def __iand__(self, other):
         dpnp.bitwise_and(self, other, out=self)
@@ -208,7 +227,10 @@ def __ilshift__(self, other):
 
  # '__imatmul__',
  # '__imod__',
- # '__imul__',
+
+    def __imul__(self, other):
+        dpnp.multiply(self, other, out=self)
+        return self
 
     def __index__(self):
         return self._array_obj.__index__()
@@ -334,8 +356,8 @@ def __rxor__(self, other):
  # '__setattr__',
 
     def __setitem__(self, key, val):
-        if isinstance(key, dpnp_array):
-            key = key.get_array()
+        key = _get_unwrapped_index_key(key)
+
         if isinstance(val, dpnp_array):
             val = val.get_array()
 
@@ -760,6 +782,8 @@ def item(self, id=None):
     @property
     def itemsize(self):
         """
+        Size of one array element in bytes.
+
         """
 
         return self._array_obj.itemsize
@@ -785,11 +809,20 @@ def min(self, axis=None, out=None, keepdims=numpy._NoValue, initial=numpy._NoVal
 
         return dpnp.min(self, axis, out, keepdims, initial, where)
 
- # 'nbytes',
+    @property
+    def nbytes(self):
+        """
+        Total bytes consumed by the elements of the array.
+
+        """
+
+        return self._array_obj.nbytes
 
     @property
     def ndim(self):
         """
+        Number of array dimensions.
+
         """
 
         return self._array_obj.ndim
diff --git a/dpnp/dpnp_container.py b/dpnp/dpnp_container.py
index 12d28074b8fb..5bd6f460496f 100644
--- a/dpnp/dpnp_container.py
+++ b/dpnp/dpnp_container.py
@@ -86,22 +86,34 @@ def asarray(x1,
             usm_type=None,
             sycl_queue=None):
     """Converts `x1` to `dpnp_array`."""
-    if isinstance(x1, dpnp_array):
-        x1_obj = x1.get_array()
-    else:
-        x1_obj = x1
+    dpu.validate_usm_type(usm_type, allow_none=True)
 
-    sycl_queue_normalized = dpnp.get_normalized_queue_device(x1_obj, device=device, sycl_queue=sycl_queue)
     if order is None:
         order = 'C'
 
     """Converts incoming 'x1' object to 'dpnp_array'."""
-    array_obj = dpt.asarray(x1_obj,
-                            dtype=dtype,
-                            copy=copy,
-                            order=order,
-                            usm_type=usm_type,
-                            sycl_queue=sycl_queue_normalized)
+    if isinstance(x1, (list, tuple, range)):
+        array_obj = dpt.asarray(x1,
+                                dtype=dtype,
+                                copy=copy,
+                                order=order,
+                                device=device,
+                                usm_type=usm_type,
+                                sycl_queue=sycl_queue)
+    else:
+        if isinstance(x1, dpnp_array):
+            x1_obj = x1.get_array()
+        else:
+            x1_obj = x1
+
+        sycl_queue_normalized = dpnp.get_normalized_queue_device(x1_obj, device=device, sycl_queue=sycl_queue)
+
+        array_obj = dpt.asarray(x1_obj,
+                                dtype=dtype,
+                                copy=copy,
+                                order=order,
+                                usm_type=usm_type,
+                                sycl_queue=sycl_queue_normalized)
     return dpnp_array(array_obj.shape, buffer=array_obj, order=order)
 
 
diff --git a/dpnp/dpnp_iface.py b/dpnp/dpnp_iface.py
index 9bf456060ddd..6a5bcf239df2 100644
--- a/dpnp/dpnp_iface.py
+++ b/dpnp/dpnp_iface.py
@@ -272,6 +272,10 @@ def get_dpnp_descriptor(ext_obj,
     if use_origin_backend():
         return False
 
+    # It's required to keep track of input object if a non-strided copy is going to be created.
+    # Thus there will be an extra descriptor allocated to refer on original input.
+    orig_desc = None
+
     # If input object is a scalar, it means it was allocated on host memory.
     # We need to copy it to USM memory according to compute follows data paradigm.
     if isscalar(ext_obj):
@@ -291,6 +295,7 @@ def get_dpnp_descriptor(ext_obj,
             ext_obj_offset = 0
 
         if ext_obj.strides != shape_offsets or ext_obj_offset != 0:
+            orig_desc = dpnp_descriptor(ext_obj)
             ext_obj = array(ext_obj)
 
     # while dpnp functions are based on DPNP_QUEUE
@@ -304,7 +309,7 @@ def get_dpnp_descriptor(ext_obj,
         if not queue_is_default:
             ext_obj = array(ext_obj, sycl_queue=default_queue)
 
-    dpnp_desc = dpnp_descriptor(ext_obj)
+    dpnp_desc = dpnp_descriptor(ext_obj, orig_desc)
     if dpnp_desc.is_valid:
         return dpnp_desc
 
diff --git a/dpnp/dpnp_iface_bitwise.py b/dpnp/dpnp_iface_bitwise.py
index 36f37f4282ec..92f33bc6310a 100644
--- a/dpnp/dpnp_iface_bitwise.py
+++ b/dpnp/dpnp_iface_bitwise.py
@@ -62,7 +62,9 @@
 def _check_nd_call(origin_func, dpnp_func, x1, x2, dtype=None, out=None, where=True, **kwargs):
     """Choose function to call based on input and call chosen fucntion."""
 
-    if where is not True:
+    if kwargs:
+        pass
+    elif where is not True:
         pass
     elif dtype is not None:
         pass
@@ -85,7 +87,7 @@ def _check_nd_call(origin_func, dpnp_func, x1, x2, dtype=None, out=None, where=T
             if out is not None:
                 if not isinstance(out, (dpnp.ndarray, dpt.usm_ndarray)):
                     raise TypeError("return array must be of supported array type")
-                out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False)
+                out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) or None
             else:
                 out_desc = None
 
@@ -273,7 +275,7 @@ def invert(x,
             if out is not None:
                 if not isinstance(out, (dpnp.ndarray, dpt.usm_ndarray)):
                     raise TypeError("return array must be of supported array type")
-                out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False)
+                out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) or None
             else:
                 out_desc = None
         return dpnp_invert(x1_desc, out_desc).get_pyobj()
diff --git a/dpnp/dpnp_iface_linearalgebra.py b/dpnp/dpnp_iface_linearalgebra.py
index a989f745c0a1..2a643fc8469b 100644
--- a/dpnp/dpnp_iface_linearalgebra.py
+++ b/dpnp/dpnp_iface_linearalgebra.py
@@ -114,7 +114,7 @@ def dot(x1, x2, out=None, **kwargs):
             if out is not None:
                 if not isinstance(out, (dpnp.ndarray, dpt.usm_ndarray)):
                     raise TypeError("return array must be of supported array type")
-                out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False)
+                out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) or None
             else:
                 out_desc = None
             return dpnp_dot(x1_desc, x2_desc, out=out_desc).get_pyobj()
diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py
index 08de8b2ba5a8..b98e51525d89 100644
--- a/dpnp/dpnp_iface_mathematical.py
+++ b/dpnp/dpnp_iface_mathematical.py
@@ -95,6 +95,41 @@
 ]
 
 
+def _check_nd_call(origin_func, dpnp_func, x1, x2, out=None, where=True, dtype=None, subok=True, **kwargs):
+    """Choose function to call based on input and call chosen fucntion."""
+
+    if kwargs:
+        pass
+    elif where is not True:
+        pass
+    elif dtype is not None:
+        pass
+    elif subok is not True:
+        pass
+    elif dpnp.isscalar(x1) and dpnp.isscalar(x2):
+        # at least either x1 or x2 has to be an array
+        pass
+    else:
+        # get USM type and queue to copy scalar from the host memory into a USM allocation
+        usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None)
+
+        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        if x1_desc and x2_desc:
+            if out is not None:
+                if not isinstance(out, (dpnp.ndarray, dpt.usm_ndarray)):
+                    raise TypeError("return array must be of supported array type")
+                out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) or None
+            else:
+                out_desc = None
+
+            return dpnp_func(x1_desc, x2_desc, dtype=dtype, out=out_desc, where=where).get_pyobj()
+
+    return call_origin(origin_func, x1, x2, dtype=dtype, out=out, where=where, **kwargs)
+
+
 def abs(*args, **kwargs):
     """
     Calculate the absolute value element-wise.
@@ -200,7 +235,7 @@ def add(x1,
     -----------
     Parameters `x1` and `x2` are supported as either scalar, :class:`dpnp.ndarray`
     or :class:`dpctl.tensor.usm_ndarray`, but both `x1` and `x2` can not be scalars at the same time.
-    Parameters `out`, `where`, `dtype` and `subok` are supported with their default values.
+    Parameters `where`, `dtype` and `subok` are supported with their default values.
     Keyword arguments ``kwargs`` are currently unsupported.
     Otherwise the function will be executed sequentially on CPU.
     Input array data types are limited by supported DPNP :ref:`Data types`.
@@ -216,29 +251,7 @@ def add(x1,
 
     """
 
-    if out is not None:
-        pass
-    elif where is not True:
-        pass
-    elif dtype is not None:
-        pass
-    elif subok is not True:
-        pass
-    elif dpnp.isscalar(x1) and dpnp.isscalar(x2):
-        # at least either x1 or x2 has to be an array
-        pass
-    else:
-        # get USM type and queue to copy scalar from the host memory into a USM allocation
-        usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None)
-
-        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False,
-                                           alloc_usm_type=usm_type, alloc_queue=queue)
-        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False,
-                                           alloc_usm_type=usm_type, alloc_queue=queue)
-        if x1_desc and x2_desc:
-            return dpnp_add(x1_desc, x2_desc, dtype=dtype, out=out, where=where).get_pyobj()
-
-    return call_origin(numpy.add, x1, x2, out=out, where=where, dtype=dtype, subok=subok, **kwargs)
+    return _check_nd_call(numpy.add, dpnp_add, x1, x2, out=out, where=where, dtype=dtype, subok=subok, **kwargs)
 
 
 def around(x1, decimals=0, out=None):
@@ -852,7 +865,15 @@ def fmin(*args, **kwargs):
     return dpnp.minimum(*args, **kwargs)
 
 
-def fmod(x1, x2, dtype=None, out=None, where=True, **kwargs):
+def fmod(x1,
+         x2,
+         /,
+         out=None,
+         *,
+         where=True,
+         dtype=None,
+         subok=True,
+         **kwargs):
     """
     Calculate the element-wise remainder of division.
 
@@ -860,55 +881,30 @@ def fmod(x1, x2, dtype=None, out=None, where=True, **kwargs):
 
     Limitations
     -----------
-    Parameters ``x1`` and ``x2`` are supported as either :obj:`dpnp.ndarray` or scalar.
-    Parameters ``dtype``, ``out`` and ``where`` are supported with their default values.
-    Keyword arguments ``kwargs`` are currently unsupported.
-    Otherwise the functions will be executed sequentially on CPU.
+    Parameters `x1` and `x2` are supported as either scalar, :class:`dpnp.ndarray`
+    or :class:`dpctl.tensor.usm_ndarray`, but both `x1` and `x2` can not be scalars at the same time.
+    Parameters `where`, `dtype` and `subok` are supported with their default values.
+    Keyword argument `kwargs` is currently unsupported.
+    Otherwise the function will be executed sequentially on CPU.
     Input array data types are limited by supported DPNP :ref:`Data types`.
 
     See Also
     --------
-    :obj:`dpnp.reminder` : Remainder complementary to floor_divide.
+    :obj:`dpnp.remainder` : Remainder complementary to floor_divide.
     :obj:`dpnp.divide` : Standard division.
 
     Examples
     --------
-    >>> import dpnp as np
-    >>> a = np.array([2, -3, 4, 5, -4.5])
-    >>> b = np.array([2, 2, 2, 2, 2])
-    >>> result = np.fmod(a, b)
+    >>> import dpnp as dp
+    >>> a = dp.array([2, -3, 4, 5, -4.5])
+    >>> b = dp.array([2, 2, 2, 2, 2])
+    >>> result = dp.fmod(a, b)
     >>> [x for x in result]
     [0.0, -1.0, 0.0, 1.0, -0.5]
 
     """
 
-    x1_is_scalar = dpnp.isscalar(x1)
-    x2_is_scalar = dpnp.isscalar(x2)
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False)
-    x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False)
-
-    if x1_desc and x2_desc and not kwargs:
-        if not x1_desc and not x1_is_scalar:
-            pass
-        elif not x2_desc and not x2_is_scalar:
-            pass
-        elif x1_is_scalar and x2_is_scalar:
-            pass
-        elif x1_desc and x1_desc.ndim == 0:
-            pass
-        elif x2_desc and x2_desc.ndim == 0:
-            pass
-        elif dtype is not None:
-            pass
-        elif out is not None:
-            pass
-        elif not where:
-            pass
-        else:
-            out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) if out is not None else None
-            return dpnp_fmod(x1_desc, x2_desc, dtype, out_desc, where).get_pyobj()
-
-    return call_origin(numpy.fmod, x1, x2, dtype=dtype, out=out, where=where, **kwargs)
+    return _check_nd_call(numpy.fmod, dpnp_fmod, x1, x2, out=out, where=where, dtype=dtype, subok=subok, **kwargs)
 
 
 def gradient(x1, *varargs, **kwargs):
@@ -1142,7 +1138,7 @@ def multiply(x1,
     -----------
     Parameters `x1` and `x2` are supported as either scalar, :class:`dpnp.ndarray`
     or :class:`dpctl.tensor.usm_ndarray`, but both `x1` and `x2` can not be scalars at the same time.
-    Parameters `out`, `where`, `dtype` and `subok` are supported with their default values.
+    Parameters `where`, `dtype` and `subok` are supported with their default values.
     Keyword arguments ``kwargs`` are currently unsupported.
     Otherwise the functions will be executed sequentially on CPU.
     Input array data types are limited by supported DPNP :ref:`Data types`.
@@ -1157,29 +1153,7 @@ def multiply(x1,
 
     """
 
-    if out is not None:
-        pass
-    elif where is not True:
-        pass
-    elif dtype is not None:
-        pass
-    elif subok is not True:
-        pass
-    elif dpnp.isscalar(x1) and dpnp.isscalar(x2):
-        # at least either x1 or x2 has to be an array
-        pass
-    else:
-        # get USM type and queue to copy scalar from the host memory into a USM allocation
-        usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None)
-
-        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False,
-                                           alloc_usm_type=usm_type, alloc_queue=queue)
-        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False,
-                                           alloc_usm_type=usm_type, alloc_queue=queue)
-        if x1_desc and x2_desc:
-            return dpnp_multiply(x1_desc, x2_desc, dtype=dtype, out=out, where=where).get_pyobj()
-
-    return call_origin(numpy.multiply, x1, x2, out=out, where=where, dtype=dtype, subok=subok, **kwargs)
+    return _check_nd_call(numpy.multiply, dpnp_multiply, x1, x2, out=out, where=where, dtype=dtype, subok=subok, **kwargs)
 
 
 def nancumprod(x1, **kwargs):
@@ -1397,34 +1371,7 @@ def power(x1,
 
     """
 
-    if where is not True:
-        pass
-    elif dtype is not None:
-        pass
-    elif subok is not True:
-        pass
-    elif dpnp.isscalar(x1) and dpnp.isscalar(x2):
-        # at least either x1 or x2 has to be an array
-        pass
-    else:
-        # get USM type and queue to copy scalar from the host memory into a USM allocation
-        usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None)
-
-        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False,
-                                           alloc_usm_type=usm_type, alloc_queue=queue)
-        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False,
-                                           alloc_usm_type=usm_type, alloc_queue=queue)
-        if x1_desc and x2_desc:
-            if out is not None:
-                if not isinstance(out, (dpnp.ndarray, dpt.usm_ndarray)):
-                    raise TypeError("return array must be of supported array type")
-                out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False)
-            else:
-                out_desc = None
-
-            return dpnp_power(x1_desc, x2_desc, dtype=dtype, out=out_desc, where=where).get_pyobj()
-
-    return call_origin(numpy.power, x1, x2, out=out, where=where, dtype=dtype, subok=subok, **kwargs)
+    return _check_nd_call(numpy.power, dpnp_power, x1, x2, out=out, where=where, dtype=dtype, subok=subok, **kwargs)
 
 
 def prod(x1, axis=None, dtype=None, out=None, keepdims=False, initial=None, where=True):
diff --git a/dpnp/dpnp_utils/dpnp_algo_utils.pxd b/dpnp/dpnp_utils/dpnp_algo_utils.pxd
index db7127319bb0..0015e8d12c02 100644
--- a/dpnp/dpnp_utils/dpnp_algo_utils.pxd
+++ b/dpnp/dpnp_utils/dpnp_algo_utils.pxd
@@ -116,11 +116,13 @@ cdef class dpnp_descriptor:
 
     cdef public:  # TODO remove "public" as python accessible attribute
         object origin_pyobj
+        dpnp_descriptor origin_desc
         dict descriptor
         Py_ssize_t dpnp_descriptor_data_size
         cpp_bool dpnp_descriptor_is_scalar
 
     cdef void * get_data(self)
+    cdef cpp_bool match_ctype(self, DPNPFuncType ctype)
 
 
 cdef shape_type_c get_common_shape(shape_type_c input1_shape, shape_type_c input2_shape) except *
diff --git a/dpnp/dpnp_utils/dpnp_algo_utils.pyx b/dpnp/dpnp_utils/dpnp_algo_utils.pyx
index a94381788764..7a3fb316261d 100644
--- a/dpnp/dpnp_utils/dpnp_algo_utils.pyx
+++ b/dpnp/dpnp_utils/dpnp_algo_utils.pyx
@@ -35,6 +35,8 @@ import numpy
 
 import dpctl
 import dpctl.utils as dpu
+import dpctl.tensor._copy_utils as dpt_cu
+import dpctl.tensor._tensor_impl as dpt_ti
 
 import dpnp.config as config
 import dpnp.dpnp_container as dpnp_container
@@ -660,9 +662,10 @@ cdef tuple get_common_usm_allocation(dpnp_descriptor x1, dpnp_descriptor x2):
 
 
 cdef class dpnp_descriptor:
-    def __init__(self, obj):
+    def __init__(self, obj, dpnp_descriptor orig_desc=None):
         """ Initialze variables """
         self.origin_pyobj = None
+        self.origin_desc = None
         self.descriptor = None
         self.dpnp_descriptor_data_size = 0
         self.dpnp_descriptor_is_scalar = True
@@ -681,6 +684,10 @@ cdef class dpnp_descriptor:
 
         self.origin_pyobj = obj
 
+        """ Keep track of a descriptor with original data """
+        if orig_desc is not None and orig_desc.is_valid:
+            self.origin_desc = orig_desc
+
         """ array size calculation """
         cdef Py_ssize_t shape_it = 0
         self.dpnp_descriptor_data_size = 1
@@ -740,6 +747,14 @@ cdef class dpnp_descriptor:
     def is_scalar(self):
         return self.dpnp_descriptor_is_scalar
 
+    @property
+    def is_temporary(self):
+        """
+        Non-none descriptor of original data means the current descriptor
+        holds a temporary allocated data.
+        """
+        return self.origin_desc is not None
+
     @property
     def data(self):
         if self.is_valid:
@@ -771,6 +786,15 @@ cdef class dpnp_descriptor:
 
         return interface_dict
 
+    def _copy_array_from(self, other_desc):
+        """
+        Fill array data with usm_ndarray of the same shape from other DPNP descriptor
+        """
+        if not isinstance(other_desc, dpnp_descriptor):
+            raise TypeError("expected dpnp_descriptor, got {}".format(type(other_desc)))
+
+        dpt_cu._copy_same_shape(self.get_array(), other_desc.get_array())
+
     def get_pyobj(self):
         return self.origin_pyobj
 
@@ -784,6 +808,29 @@ cdef class dpnp_descriptor:
             "expected either dpctl.tensor.usm_ndarray or dpnp.dpnp_array.dpnp_array, got {}"
             "".format(type(self.origin_pyobj)))
 
+    def get_result_desc(self, result_desc=None):
+        """
+        Copy the result data into an original array
+        """
+        if self.is_temporary:
+            # Original descriptor is not None, so copy the array data into it and return
+            from_desc = self if result_desc is None else result_desc
+            self.origin_desc._copy_array_from(from_desc)
+            return self.origin_desc
+        elif result_desc is not None:
+            # A temporary result descriptor was allocated, needs to copy data back into 'out' descriptor
+            self._copy_array_from(result_desc)
+        return self
+
+    def is_array_overlapped(self, other_desc):
+        """
+        Check if usm_ndarray overlaps an array from other DPNP descriptor
+        """
+        if not isinstance(other_desc, dpnp_descriptor):
+            raise TypeError("expected dpnp_descriptor, got {}".format(type(other_desc)))
+
+        return dpt_ti._array_overlap(self.get_array(), other_desc.get_array())
+
     cdef void * get_data(self):
         cdef Py_ssize_t item_size = 0
         cdef Py_ssize_t elem_offset = 0
@@ -798,6 +845,9 @@ cdef class dpnp_descriptor:
 
         return < void * > val
 
+    cdef cpp_bool match_ctype(self, DPNPFuncType ctype):
+        return self.dtype == dpnp_DPNPFuncType_to_dtype(< size_t > ctype)
+
     def __bool__(self):
         return self.is_valid
 
diff --git a/tests/skipped_tests.tbl b/tests/skipped_tests.tbl
index 2e7a5a6d6f3e..08a59798d99b 100644
--- a/tests/skipped_tests.tbl
+++ b/tests/skipped_tests.tbl
@@ -715,19 +715,15 @@ tests/third_party/cupy/manipulation_tests/test_tiling.py::TestTile_param_5_{reps
 tests/third_party/cupy/manipulation_tests/test_transpose.py::TestTranspose::test_moveaxis_invalid5_2
 tests/third_party/cupy/manipulation_tests/test_transpose.py::TestTranspose::test_moveaxis_invalid5_3
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_455_{arg1=array([[1, 2, 3],       [4, 5, 6]], dtype=int32), arg2=array([[0, 1, 2],       [3, 4, 5]], dtype=int32), dtype=float64, name='floor_divide', use_dtype=False}::test_binary
-tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_457_{arg1=array([[1, 2, 3],       [4, 5, 6]], dtype=int32), arg2=array([[0, 1, 2],       [3, 4, 5]], dtype=int32), dtype=float64, name='fmod', use_dtype=False}::test_binary
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_459_{arg1=array([[1, 2, 3],       [4, 5, 6]], dtype=int32), arg2=array([[0, 1, 2],       [3, 4, 5]], dtype=int32), dtype=float64, name='remainder', use_dtype=False}::test_binary
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_461_{arg1=array([[1, 2, 3],       [4, 5, 6]], dtype=int32), arg2=array([[0, 1, 2],       [3, 4, 5]], dtype=int32), dtype=float64, name='mod', use_dtype=False}::test_binary
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_463_{arg1=array([[1, 2, 3],       [4, 5, 6]], dtype=int32), arg2=array([[0, 1, 2],       [3, 4, 5]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary
-tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_465_{arg1=array([[1, 2, 3],       [4, 5, 6]], dtype=int32), arg2=array([[0, 1, 2],       [3, 4, 5]]), dtype=float64, name='fmod', use_dtype=False}::test_binary
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_467_{arg1=array([[1, 2, 3],       [4, 5, 6]], dtype=int32), arg2=array([[0, 1, 2],       [3, 4, 5]]), dtype=float64, name='remainder', use_dtype=False}::test_binary
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_469_{arg1=array([[1, 2, 3],       [4, 5, 6]], dtype=int32), arg2=array([[0, 1, 2],       [3, 4, 5]]), dtype=float64, name='mod', use_dtype=False}::test_binary
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_535_{arg1=array([[1, 2, 3],       [4, 5, 6]]), arg2=array([[0, 1, 2],       [3, 4, 5]], dtype=int32), dtype=float64, name='floor_divide', use_dtype=False}::test_binary
-tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_537_{arg1=array([[1, 2, 3],       [4, 5, 6]]), arg2=array([[0, 1, 2],       [3, 4, 5]], dtype=int32), dtype=float64, name='fmod', use_dtype=False}::test_binary
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_539_{arg1=array([[1, 2, 3],       [4, 5, 6]]), arg2=array([[0, 1, 2],       [3, 4, 5]], dtype=int32), dtype=float64, name='remainder', use_dtype=False}::test_binary
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_541_{arg1=array([[1, 2, 3],       [4, 5, 6]]), arg2=array([[0, 1, 2],       [3, 4, 5]], dtype=int32), dtype=float64, name='mod', use_dtype=False}::test_binary
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_543_{arg1=array([[1, 2, 3],       [4, 5, 6]]), arg2=array([[0, 1, 2],       [3, 4, 5]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary
-tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_545_{arg1=array([[1, 2, 3],       [4, 5, 6]]), arg2=array([[0, 1, 2],       [3, 4, 5]]), dtype=float64, name='fmod', use_dtype=False}::test_binary
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_547_{arg1=array([[1, 2, 3],       [4, 5, 6]]), arg2=array([[0, 1, 2],       [3, 4, 5]]), dtype=float64, name='remainder', use_dtype=False}::test_binary
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_549_{arg1=array([[1, 2, 3],       [4, 5, 6]]), arg2=array([[0, 1, 2],       [3, 4, 5]]), dtype=float64, name='mod', use_dtype=False}::test_binary
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticModf::test_modf
diff --git a/tests/test_indexing.py b/tests/test_indexing.py
index 41128fd70e2d..fb49d8c87495 100644
--- a/tests/test_indexing.py
+++ b/tests/test_indexing.py
@@ -6,10 +6,70 @@
 
 import numpy
 from numpy.testing import (
-    assert_array_equal
+    assert_,
+    assert_array_equal,
+    assert_equal
 )
 
 
+class TestIndexing:
+    def test_ellipsis_index(self):
+        a = dpnp.array([[1, 2, 3],
+                        [4, 5, 6],
+                        [7, 8, 9]])
+        assert_(a[...] is not a)
+        assert_equal(a[...], a)
+
+        # test that slicing with ellipsis doesn't skip an arbitrary number of dimensions
+        assert_equal(a[0, ...], a[0])
+        assert_equal(a[0, ...], a[0,:])
+        assert_equal(a[..., 0], a[:, 0])
+
+        # test that slicing with ellipsis always results in an array
+        assert_equal(a[0, ..., 1], dpnp.array(2))
+
+        # assignment with `(Ellipsis,)` on 0-d arrays
+        b = dpnp.array(1)
+        b[(Ellipsis,)] = 2
+        assert_equal(b, 2)
+
+    def test_boolean_indexing_list(self):
+        a = dpnp.array([1, 2, 3])
+        b = dpnp.array([True, False, True])
+
+        assert_equal(a[b], [1, 3])
+        assert_equal(a[None, b], [[1, 3]])
+
+    def test_indexing_array_weird_strides(self):
+        np_x = numpy.ones(10)
+        dp_x = dpnp.ones(10)
+
+        np_ind = numpy.arange(10)[:, None, None, None]
+        np_ind = numpy.broadcast_to(np_ind, (10, 55, 4, 4))
+
+        dp_ind = dpnp.arange(10)[:, None, None, None]
+        dp_ind = dpnp.broadcast_to(dp_ind, (10, 55, 4, 4))
+
+        # single advanced index case
+        assert_array_equal(dp_x[dp_ind], np_x[np_ind])
+
+        np_x2 = numpy.ones((10, 2))
+        dp_x2 = dpnp.ones((10, 2))
+
+        np_zind = numpy.zeros(4, dtype=np_ind.dtype)
+        dp_zind = dpnp.zeros(4, dtype=dp_ind.dtype)
+
+        # higher dimensional advanced index
+        assert_array_equal(dp_x2[dp_ind, dp_zind], np_x2[np_ind, np_zind])
+
+    def test_indexing_array_negative_strides(self):
+        arr = dpnp.zeros((4, 4))[::-1, ::-1]
+
+        slices = (slice(None), dpnp.array([0, 1, 2, 3]))
+        arr[slices] = 10
+        assert_array_equal(arr, 10.)
+
+
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
 def test_choose():
     a = numpy.r_[:4]
diff --git a/tests/test_mathematical.py b/tests/test_mathematical.py
index e58e129c03b3..a5449e2cf6f0 100644
--- a/tests/test_mathematical.py
+++ b/tests/test_mathematical.py
@@ -1,6 +1,7 @@
 import pytest
 from .helper import (
     get_all_dtypes,
+    get_float_complex_dtypes,
     is_cpu_device,
     is_win_platform
 )
@@ -71,11 +72,14 @@ def test_diff(array):
 @pytest.mark.parametrize("dtype1", get_all_dtypes())
 @pytest.mark.parametrize("dtype2", get_all_dtypes())
 @pytest.mark.parametrize("func",
-                         ['add', 'divide', 'multiply', 'power', 'subtract'])
+                         ['add', 'divide', 'fmod', 'multiply', 'power', 'subtract'])
 @pytest.mark.parametrize("data",
                          [[[1, 2], [3, 4]]],
                          ids=['[[1, 2], [3, 4]]'])
 def test_op_multiple_dtypes(dtype1, func, dtype2, data):
+    if func == 'fmod' and (dpnp.issubdtype(dtype1, dpnp.complexfloating) or dpnp.issubdtype(dtype2, dpnp.complexfloating)):
+        pytest.skip("no support of fmod for complex")
+
     np_a = numpy.array(data, dtype=dtype1)
     dpnp_a = dpnp.array(data, dtype=dtype1)
 
@@ -141,9 +145,18 @@ def test_copysign(self, dtype, lhs, rhs):
     def test_divide(self, dtype, lhs, rhs):
         self._test_mathematical('divide', dtype, lhs, rhs)
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
     def test_fmod(self, dtype, lhs, rhs):
+        if dtype == dpnp.float32 and rhs == 0.3:
+            """
+            Due to some reason NumPy behaves incorrectly, when:
+                >>> numpy.fmod(numpy.array([3.9], dtype=numpy.float32), 0.3)
+                array([0.29999995], dtype=float32)
+            while dpnp returns something around zero which is expected:
+                >>> dpnp.fmod(dpnp.array([3.9], dtype=dpnp.float32), 0.3)
+                array([9.53674318e-08])
+            """
+            pytest.skip("missaligned with numpy results")
         self._test_mathematical('fmod', dtype, lhs, rhs)
 
     @pytest.mark.usefixtures("allow_fall_back_on_numpy")
@@ -216,6 +229,9 @@ def test_op_with_scalar(array, val, func, data_type, val_type):
         elif is_cpu_device() and data_type == dpnp.complex128:
             # TODO: discuss the bahavior with OneMKL team
             pytest.skip("(0j ** 5) is different: (NaN + NaNj) in dpnp and (0j) in numpy")
+    elif func == 'fmod' and ((data_type is None or not dpnp.issubdtype(data_type, dpnp.floating)) and
+                             (val_type != float or dpnp.issubdtype(data_type, dpnp.complexfloating))):
+        pytest.skip("dpnp.fmod(a, 0) != 0 for integer a, like it's in numpy")
 
     if func == 'subtract' and val_type == bool and data_type == dpnp.bool:
         with pytest.raises(TypeError):
@@ -634,34 +650,232 @@ def test_invalid_shape(self, shape):
             dpnp.trunc(dp_array, out=dp_out)
 
 
-class TestPower:
+class TestAdd:
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_none=True))
+    def test_add(self, dtype):
+        array1_data = numpy.arange(10)
+        array2_data = numpy.arange(5, 15)
+        out = numpy.empty(10, dtype=dtype)
+
+        # DPNP
+        dp_array1 = dpnp.array(array1_data, dtype=dtype)
+        dp_array2 = dpnp.array(array2_data, dtype=dtype)
+        dp_out = dpnp.array(out, dtype=dtype)
+        result = dpnp.add(dp_array1, dp_array2, out=dp_out)
+
+        # original
+        np_array1 = numpy.array(array1_data, dtype=dtype)
+        np_array2 = numpy.array(array2_data, dtype=dtype)
+        expected = numpy.add(np_array1, np_array2, out=out)
+
+        assert_allclose(expected, result)
+        assert_allclose(out, dp_out)
+
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_none=True))
+    def test_out_dtypes(self, dtype):
+        size = 2 if dtype == dpnp.bool else 10
+
+        np_array1 = numpy.arange(size, 2 * size, dtype=dtype)
+        np_array2 = numpy.arange(size, dtype=dtype)
+        np_out = numpy.empty(size, dtype=numpy.complex64)
+        expected = numpy.add(np_array1, np_array2, out=np_out)
 
-    def test_power(self):
+        dp_array1 = dpnp.arange(size, 2 * size, dtype=dtype)
+        dp_array2 = dpnp.arange(size, dtype=dtype)
+        dp_out = dpnp.empty(size, dtype=dpnp.complex64)
+        result = dpnp.add(dp_array1, dp_array2, out=dp_out)
+
+        assert_array_equal(expected, result)
+
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_none=True))
+    def test_out_overlap(self, dtype):
+        size = 1 if dtype == dpnp.bool else 15
+
+        np_a = numpy.arange(2 * size, dtype=dtype)
+        expected = numpy.add(np_a[size::], np_a[::2], out=np_a[:size:])
+
+        dp_a = dpnp.arange(2 * size, dtype=dtype)
+        result = dpnp.add(dp_a[size::], dp_a[::2], out=dp_a[:size:])
+
+        assert_allclose(expected, result)
+        assert_allclose(dp_a, np_a)
+
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_none=True))
+    def test_inplace_strided_out(self, dtype):
+        size = 21
+
+        np_a = numpy.arange(size, dtype=dtype)
+        np_a[::3] += 4
+
+        dp_a = dpnp.arange(size, dtype=dtype)
+        dp_a[::3] += 4
+
+        assert_allclose(dp_a, np_a)
+
+    @pytest.mark.parametrize("shape",
+                             [(0,), (15, ), (2, 2)],
+                             ids=['(0,)', '(15, )', '(2,2)'])
+    def test_invalid_shape(self, shape):
+        dp_array1 = dpnp.arange(10, dtype=dpnp.float64)
+        dp_array2 = dpnp.arange(5, 15, dtype=dpnp.float64)
+        dp_out = dpnp.empty(shape, dtype=dpnp.float64)
+
+        with pytest.raises(ValueError):
+            dpnp.add(dp_array1, dp_array2, out=dp_out)
+
+    @pytest.mark.parametrize("out",
+                             [4, (), [], (3, 7), [2, 4]],
+                             ids=['4', '()', '[]', '(3, 7)', '[2, 4]'])
+    def test_invalid_out(self, out):
+        a = dpnp.arange(10)
+
+        assert_raises(TypeError, dpnp.add, a, 2, out)
+        assert_raises(TypeError, numpy.add, a.asnumpy(), 2, out)
+
+
+class TestMultiply:
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_none=True))
+    def test_multiply(self, dtype):
         array1_data = numpy.arange(10)
         array2_data = numpy.arange(5, 15)
-        out = numpy.empty(10, dtype=numpy.float64)
+        out = numpy.empty(10, dtype=dtype)
 
         # DPNP
-        dp_array1 = dpnp.array(array1_data, dtype=dpnp.float64)
-        dp_array2 = dpnp.array(array2_data, dtype=dpnp.float64)
-        dp_out = dpnp.array(out, dtype=dpnp.float64)
+        dp_array1 = dpnp.array(array1_data, dtype=dtype)
+        dp_array2 = dpnp.array(array2_data, dtype=dtype)
+        dp_out = dpnp.array(out, dtype=dtype)
+        result = dpnp.multiply(dp_array1, dp_array2, out=dp_out)
+
+        # original
+        np_array1 = numpy.array(array1_data, dtype=dtype)
+        np_array2 = numpy.array(array2_data, dtype=dtype)
+        expected = numpy.multiply(np_array1, np_array2, out=out)
+
+        assert_allclose(expected, result)
+        assert_allclose(out, dp_out)
+
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_none=True))
+    def test_out_dtypes(self, dtype):
+        size = 2 if dtype == dpnp.bool else 10
+
+        np_array1 = numpy.arange(size, 2 * size, dtype=dtype)
+        np_array2 = numpy.arange(size, dtype=dtype)
+        np_out = numpy.empty(size, dtype=numpy.complex64)
+        expected = numpy.multiply(np_array1, np_array2, out=np_out)
+
+        dp_array1 = dpnp.arange(size, 2 * size, dtype=dtype)
+        dp_array2 = dpnp.arange(size, dtype=dtype)
+        dp_out = dpnp.empty(size, dtype=dpnp.complex64)
+        result = dpnp.multiply(dp_array1, dp_array2, out=dp_out)
+
+        assert_array_equal(expected, result)
+
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_none=True))
+    def test_out_overlap(self, dtype):
+        size = 1 if dtype == dpnp.bool else 15
+
+        np_a = numpy.arange(2 * size, dtype=dtype)
+        expected = numpy.multiply(np_a[size::], np_a[::2], out=np_a[:size:])
+
+        dp_a = dpnp.arange(2 * size, dtype=dtype)
+        result = dpnp.multiply(dp_a[size::], dp_a[::2], out=dp_a[:size:])
+
+        assert_allclose(expected, result)
+        assert_allclose(dp_a, np_a)
+
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_none=True))
+    def test_inplace_strided_out(self, dtype):
+        size = 21
+
+        np_a = numpy.arange(size, dtype=dtype)
+        np_a[::3] *= 4
+
+        dp_a = dpnp.arange(size, dtype=dtype)
+        dp_a[::3] *= 4
+
+        assert_allclose(dp_a, np_a)
+
+    @pytest.mark.parametrize("shape",
+                             [(0,), (15, ), (2, 2)],
+                             ids=['(0,)', '(15, )', '(2,2)'])
+    def test_invalid_shape(self, shape):
+        dp_array1 = dpnp.arange(10, dtype=dpnp.float64)
+        dp_array2 = dpnp.arange(5, 15, dtype=dpnp.float64)
+        dp_out = dpnp.empty(shape, dtype=dpnp.float64)
+
+        with pytest.raises(ValueError):
+            dpnp.multiply(dp_array1, dp_array2, out=dp_out)
+
+    @pytest.mark.parametrize("out",
+                             [4, (), [], (3, 7), [2, 4]],
+                             ids=['4', '()', '[]', '(3, 7)', '[2, 4]'])
+    def test_invalid_out(self, out):
+        a = dpnp.arange(10)
+
+        assert_raises(TypeError, dpnp.multiply, a, 2, out)
+        assert_raises(TypeError, numpy.multiply, a.asnumpy(), 2, out)
+
+
+class TestPower:
+    @pytest.mark.parametrize("dtype", get_float_complex_dtypes())
+    def test_power(self, dtype):
+        array1_data = numpy.arange(10)
+        array2_data = numpy.arange(5, 15)
+        out = numpy.empty(10, dtype=dtype)
+
+        # DPNP
+        dp_array1 = dpnp.array(array1_data, dtype=dtype)
+        dp_array2 = dpnp.array(array2_data, dtype=dtype)
+        dp_out = dpnp.array(out, dtype=dtype)
         result = dpnp.power(dp_array1, dp_array2, out=dp_out)
 
         # original
-        np_array1 = numpy.array(array1_data, dtype=numpy.float64)
-        np_array2 = numpy.array(array2_data, dtype=numpy.float64)
+        np_array1 = numpy.array(array1_data, dtype=dtype)
+        np_array2 = numpy.array(array2_data, dtype=dtype)
         expected = numpy.power(np_array1, np_array2, out=out)
 
-        assert_array_equal(expected, result)
+        assert_allclose(expected, result)
 
     @pytest.mark.parametrize("dtype", get_all_dtypes(no_complex=True, no_none=True))
-    def test_invalid_dtype(self, dtype):
-        dp_array1 = dpnp.arange(10, dtype=dpnp.complex64)
-        dp_array2 = dpnp.arange(5, 15, dtype=dpnp.complex64)
-        dp_out = dpnp.empty(10, dtype=dtype)
+    def test_out_dtypes(self, dtype):
+        size = 2 if dtype == dpnp.bool else 5
 
-        with pytest.raises(ValueError):
-            dpnp.power(dp_array1, dp_array2, out=dp_out)
+        np_array1 = numpy.arange(size, 2 * size, dtype=dtype)
+        np_array2 = numpy.arange(size, dtype=dtype)
+        np_out = numpy.empty(size, dtype=numpy.complex64)
+        expected = numpy.power(np_array1, np_array2, out=np_out)
+
+        dp_array1 = dpnp.arange(size, 2 * size, dtype=dtype)
+        dp_array2 = dpnp.arange(size, dtype=dtype)
+        dp_out = dpnp.empty(size, dtype=dpnp.complex64)
+        result = dpnp.power(dp_array1, dp_array2, out=dp_out)
+
+        assert_array_equal(expected, result)
+
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True, no_none=True))
+    def test_out_overlap(self, dtype):
+        size = 5
+
+        np_a = numpy.arange(2 * size, dtype=dtype)
+        expected = numpy.power(np_a[size::], np_a[::2], out=np_a[:size:])
+
+        dp_a = dpnp.arange(2 * size, dtype=dtype)
+        result = dpnp.power(dp_a[size::], dp_a[::2], out=dp_a[:size:])
+
+        assert_allclose(expected, result)
+        assert_allclose(dp_a, np_a)
+
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True, no_none=True))
+    def test_inplace_strided_out(self, dtype):
+        size = 5
+
+        np_a = numpy.arange(2 * size, dtype=dtype)
+        np_a[::3] **= 3
+
+        dp_a = dpnp.arange(2 * size, dtype=dtype)
+        dp_a[::3] **= 3
+
+        assert_allclose(dp_a, np_a)
 
     @pytest.mark.parametrize("shape",
                              [(0,), (15, ), (2, 2)],
diff --git a/tests/test_strides.py b/tests/test_strides.py
index e56e9befeee4..849e7313f643 100644
--- a/tests/test_strides.py
+++ b/tests/test_strides.py
@@ -203,14 +203,113 @@ def test_strides_fmod(dtype, shape):
 @pytest.mark.parametrize("shape",
                          [(3, 3)],
                          ids=["(3, 3)"])
-def test_strides_true_devide(dtype, shape):
+def test_strides_true_divide(dtype, shape):
     a = numpy.arange(numpy.prod(shape), dtype=dtype).reshape(shape)
     b = a.T + 1
 
     dpa = dpnp.reshape(dpnp.arange(numpy.prod(shape), dtype=dtype), shape)
     dpb = dpa.T + 1
 
-    result = dpnp.fmod(dpa, dpb)
-    expected = numpy.fmod(a, b)
+    result = dpnp.true_divide(dpa, dpb)
+    expected = numpy.true_divide(a, b)
 
     assert_allclose(result, expected)
+
+
+@pytest.mark.parametrize("func_name",
+                         ["add", "multiply", "power"])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
+def test_strided_out_2args(func_name, dtype):
+    np_out = numpy.ones((5, 3, 2))[::3]
+    np_a = numpy.arange(numpy.prod(np_out.shape), dtype=dtype).reshape(np_out.shape)
+    np_b = numpy.full(np_out.shape, fill_value=0.7, dtype=dtype)
+
+    dp_out = dpnp.ones((5, 3, 2))[::3]
+    dp_a = dpnp.array(np_a)
+    dp_b = dpnp.array(np_b)
+
+    np_res = _getattr(numpy, func_name)(np_a, np_b, out=np_out)
+    dp_res = _getattr(dpnp, func_name)(dp_a, dp_b, out=dp_out)
+
+    assert_allclose(dp_res.asnumpy(), np_res)
+    assert_allclose(dp_out.asnumpy(), np_out)
+
+
+@pytest.mark.parametrize("func_name",
+                         ["add", "multiply", "power"])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
+def test_strided_in_out_2args(func_name, dtype):
+    sh = (3, 4, 2)
+    prod = numpy.prod(sh)
+
+    np_out = numpy.ones(sh, dtype=dtype)[::2]
+    np_a = numpy.arange(prod, dtype=dtype).reshape(sh)[::2]
+    np_b = numpy.full(sh, fill_value=0.7, dtype=dtype)[::2].T
+
+    dp_out = dpnp.ones(sh, dtype=dtype)[::2]
+    dp_a = dpnp.arange(prod, dtype=dtype).reshape(sh)[::2]
+    dp_b = dpnp.full(sh, fill_value=0.7, dtype=dtype)[::2].T
+
+    np_res = _getattr(numpy, func_name)(np_a, np_b, out=np_out)
+    dp_res = _getattr(dpnp, func_name)(dp_a, dp_b, out=dp_out)
+
+    assert_allclose(dp_res.asnumpy(), np_res, rtol=1e-06)
+    assert_allclose(dp_out.asnumpy(), np_out, rtol=1e-06)
+
+
+@pytest.mark.parametrize("func_name",
+                         ["add", "multiply", "power"])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
+def test_strided_in_out_2args_diff_out_dtype(func_name, dtype):
+    sh = (3, 3, 2)
+    prod = numpy.prod(sh)
+
+    np_out = numpy.ones(sh, dtype=numpy.complex64)[::2]
+    np_a = numpy.arange(prod, dtype=dtype).reshape(sh)[::2].T
+    np_b = numpy.full(sh, fill_value=0.7, dtype=dtype)[::2]
+
+    dp_out = dpnp.ones(sh, dtype=dpnp.complex64)[::2]
+    dp_a = dpnp.arange(prod, dtype=dtype).reshape(sh)[::2].T
+    dp_b = dpnp.full(sh, fill_value=0.7, dtype=dtype)[::2]
+
+    np_res = _getattr(numpy, func_name)(np_a, np_b, out=np_out)
+    dp_res = _getattr(dpnp, func_name)(dp_a, dp_b, out=dp_out)
+
+    assert_allclose(dp_res.asnumpy(), np_res, rtol=1e-06)
+    assert_allclose(dp_out.asnumpy(), np_out, rtol=1e-06)
+
+
+@pytest.mark.parametrize("func_name",
+                         ["add", "multiply", "power"])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True, no_none=True))
+def test_strided_in_2args_overlap(func_name, dtype):
+    size = 5
+
+    np_a = numpy.arange(2 * size, dtype=dtype)
+    dp_a = dpnp.arange(2 * size, dtype=dtype)
+
+    np_res = _getattr(numpy, func_name)(np_a[size::], np_a[::2], out=np_a[:size:])
+    dp_res = _getattr(dpnp, func_name)(dp_a[size::], dp_a[::2], out=dp_a[:size:])
+
+    assert_allclose(dp_res.asnumpy(), np_res, rtol=1e-06)
+    assert_allclose(dp_a.asnumpy(), np_a, rtol=1e-06)
+
+
+@pytest.mark.parametrize("func_name",
+                         ["add", "multiply", "power"])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True, no_none=True))
+def test_strided_in_out_2args_overlap(func_name, dtype):
+    sh = (4, 3, 2)
+    prod = numpy.prod(sh)
+
+    np_a = numpy.arange(prod, dtype=dtype).reshape(sh)
+    np_b = numpy.full(np_a[::2].shape, fill_value=0.7, dtype=dtype)
+
+    dp_a = dpnp.arange(prod, dtype=dtype).reshape(sh)
+    dp_b = dpnp.full(dp_a[::2].shape, fill_value=0.7, dtype=dtype)
+
+    np_res = _getattr(numpy, func_name)(np_a[::2], np_b, out=np_a[1::2])
+    dp_res = _getattr(dpnp, func_name)(dp_a[::2], dp_b, out=dp_a[1::2])
+
+    assert_allclose(dp_res.asnumpy(), np_res, rtol=1e-06)
+    assert_allclose(dp_a.asnumpy(), np_a, rtol=1e-06)
diff --git a/tests/test_sycl_queue.py b/tests/test_sycl_queue.py
index a523c46465bf..ab974e426f93 100644
--- a/tests/test_sycl_queue.py
+++ b/tests/test_sycl_queue.py
@@ -945,3 +945,15 @@ def test_broadcast_to(device):
     x = dpnp.arange(5, device=device)
     y = dpnp.broadcast_to(x, (3, 5))
     assert_sycl_queue_equal(x.sycl_queue, y.sycl_queue)
+
+
+@pytest.mark.parametrize("device_x",
+                         valid_devices,
+                         ids=[device.filter_string for device in valid_devices])
+@pytest.mark.parametrize("device_y",
+                         valid_devices,
+                         ids=[device.filter_string for device in valid_devices])
+def test_asarray(device_x, device_y):
+    x = dpnp.array([1, 2, 3], device=device_x)
+    y = dpnp.asarray([x, x, x], device=device_y)
+    assert_sycl_queue_equal(y.sycl_queue, x.to_device(device_y).sycl_queue)
diff --git a/tests/test_umath.py b/tests/test_umath.py
index 6122b253ca37..3a1f4467dcea 100644
--- a/tests/test_umath.py
+++ b/tests/test_umath.py
@@ -1,6 +1,14 @@
 import pytest
+from .helper import (
+    get_all_dtypes
+)
 
 import numpy
+from numpy.testing import (
+    assert_allclose,
+    assert_array_equal
+)
+
 import dpnp
 
 # full list of umaths
@@ -71,7 +79,7 @@ def test_umaths(test_cases):
     # DPNP
     result = getattr(dpnp, umath)(*iargs)
 
-    numpy.testing.assert_allclose(result, expected, rtol=1e-6)
+    assert_allclose(result, expected, rtol=1e-6)
 
 
 class TestSin:
@@ -89,7 +97,7 @@ def test_sin_ordinary(self):
         np_array = numpy.array(array_data, dtype=numpy.float64)
         expected = numpy.sin(np_array, out=out)
 
-        numpy.testing.assert_array_equal(expected, result)
+        assert_array_equal(expected, result)
 
     @pytest.mark.parametrize("dtype",
                              [numpy.float32, numpy.int64, numpy.int32],
@@ -129,7 +137,7 @@ def test_cos(self):
         np_array = numpy.array(array_data, dtype=numpy.float64)
         expected = numpy.cos(np_array, out=out)
 
-        numpy.testing.assert_array_equal(expected, result)
+        assert_array_equal(expected, result)
 
     @pytest.mark.parametrize("dtype",
                              [numpy.float32, numpy.int64, numpy.int32],
@@ -169,7 +177,7 @@ def test_log(self):
         np_array = numpy.array(array_data, dtype=numpy.float64)
         expected = numpy.log(np_array, out=out)
 
-        numpy.testing.assert_array_equal(expected, result)
+        assert_array_equal(expected, result)
 
     @pytest.mark.parametrize("dtype",
                              [numpy.float32, numpy.int64, numpy.int32],
@@ -209,7 +217,7 @@ def test_exp(self):
         np_array = numpy.array(array_data, dtype=numpy.float64)
         expected = numpy.exp(np_array, out=out)
 
-        numpy.testing.assert_array_equal(expected, result)
+        assert_array_equal(expected, result)
 
     @pytest.mark.parametrize("dtype",
                              [numpy.float32, numpy.int64, numpy.int32],
@@ -249,7 +257,7 @@ def test_arcsin(self):
         np_array = numpy.array(array_data, dtype=numpy.float64)
         expected = numpy.arcsin(np_array, out=out)
 
-        numpy.testing.assert_array_equal(expected, result)
+        assert_array_equal(expected, result)
 
     @pytest.mark.parametrize("dtype",
                              [numpy.float32, numpy.int64, numpy.int32],
@@ -289,7 +297,7 @@ def test_arctan(self):
         np_array = numpy.array(array_data, dtype=numpy.float64)
         expected = numpy.arctan(np_array, out=out)
 
-        numpy.testing.assert_array_equal(expected, result)
+        assert_array_equal(expected, result)
 
     @pytest.mark.parametrize("dtype",
                              [numpy.float32, numpy.int64, numpy.int32],
@@ -329,7 +337,7 @@ def test_tan(self):
         np_array = numpy.array(array_data, dtype=numpy.float64)
         expected = numpy.tan(np_array, out=out)
 
-        numpy.testing.assert_array_equal(expected, result)
+        assert_array_equal(expected, result)
 
     @pytest.mark.parametrize("dtype",
                              [numpy.float32, numpy.int64, numpy.int32],
@@ -355,7 +363,6 @@ def test_invalid_shape(self, shape):
 
 
 class TestArctan2:
-
     def test_arctan2(self):
         array_data = numpy.arange(10)
         out = numpy.empty(10, dtype=numpy.float64)
@@ -369,18 +376,21 @@ def test_arctan2(self):
         np_array = numpy.array(array_data, dtype=numpy.float64)
         expected = numpy.arctan2(np_array, np_array, out=out)
 
-        numpy.testing.assert_array_equal(expected, result)
+        assert_array_equal(expected, result)
 
-    @pytest.mark.parametrize("dtype",
-                             [numpy.float32, numpy.int64, numpy.int32],
-                             ids=['numpy.float32', 'numpy.int64', 'numpy.int32'])
-    def test_invalid_dtype(self, dtype):
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True, no_none=True))
+    def test_out_dtypes(self, dtype):
+        size = 2 if dtype == dpnp.bool else 10
 
-        dp_array = dpnp.arange(10, dtype=dpnp.float64)
-        dp_out = dpnp.empty(10, dtype=dtype)
+        np_array = numpy.arange(size, dtype=dtype)
+        np_out = numpy.empty(size, dtype=numpy.complex64)
+        expected = numpy.arctan2(np_array, np_array, out=np_out)
 
-        with pytest.raises(ValueError):
-            dpnp.arctan2(dp_array, dp_array, out=dp_out)
+        dp_array = dpnp.arange(size, dtype=dtype)
+        dp_out = dpnp.empty(size, dtype=dpnp.complex64)
+        result = dpnp.arctan2(dp_array, dp_array, out=dp_out)
+
+        assert_allclose(expected, result)
 
     @pytest.mark.parametrize("shape",
                              [(0,), (15, ), (2, 2)],
diff --git a/tests/test_usm_type.py b/tests/test_usm_type.py
index 817bdee66a57..06c7f2b5cb24 100644
--- a/tests/test_usm_type.py
+++ b/tests/test_usm_type.py
@@ -18,6 +18,8 @@ def test_coerced_usm_types_sum(usm_type_x, usm_type_y):
     y = dp.arange(1000, usm_type = usm_type_y)
 
     z = 1.3 + x + y + 2
+    z += x
+    z += 7.4
 
     assert x.usm_type == usm_type_x
     assert y.usm_type == usm_type_y
@@ -31,6 +33,8 @@ def test_coerced_usm_types_mul(usm_type_x, usm_type_y):
     y = dp.arange(10, usm_type = usm_type_y)
 
     z = 3 * x * y * 1.5
+    z *= x
+    z *= 4.8
 
     assert x.usm_type == usm_type_x
     assert y.usm_type == usm_type_y
@@ -180,6 +184,9 @@ def test_meshgrid(usm_type_x, usm_type_y):
         pytest.param("dot",
                      [[0., 1., 2.], [3., 4., 5.]],
                      [[4., 4.], [4., 4.], [4., 4.]]),
+        pytest.param("fmod",
+                     [-3., -2., -1., 1., 2., 3.],
+                     [2., 2., 2., 2., 2., 2.]),
     ],
 )
 @pytest.mark.parametrize("usm_type_x", list_of_usm_types, ids=list_of_usm_types)
diff --git a/tests/third_party/cupy/math_tests/test_arithmetic.py b/tests/third_party/cupy/math_tests/test_arithmetic.py
index 39dc3e10f721..3150e4da99e0 100644
--- a/tests/third_party/cupy/math_tests/test_arithmetic.py
+++ b/tests/third_party/cupy/math_tests/test_arithmetic.py
@@ -145,6 +145,14 @@ def check_binary(self, xp):
             if dtype1 in (numpy.float16, numpy.float32):
                 y = y.astype(numpy.complex64)
 
+        if xp is cupy and not xp.isscalar(y) and not self.use_dtype:
+            if self.name == 'fmod':
+                # TODO: Fix this: fmod(a, 0)
+                #     numpy => 0
+                #     cupy => 2147483647
+                if not (dtype1 in float_types or dtype2 in float_types) and (np2 == 0).any():
+                    y[xp.broadcast_to(xp.array(arg2), y.shape) == 0] = 0
+
         # NumPy returns an output array of another type than DPNP when input ones have diffrent types.
         if xp is cupy and dtype1 != dtype2 and not self.use_dtype:
             is_array_arg1 = not xp.isscalar(arg1)
@@ -153,7 +161,7 @@ def check_binary(self, xp):
             is_int_float = lambda _x, _y: numpy.issubdtype(_x, numpy.integer) and numpy.issubdtype(_y, numpy.floating)
             is_same_type = lambda _x, _y, _type: numpy.issubdtype(_x, _type) and numpy.issubdtype(_y, _type)
 
-            if self.name in ('add', 'multiply', 'power', 'subtract'):
+            if self.name in ('add', 'fmod', 'multiply', 'power', 'subtract'):
                 if is_array_arg1 and is_array_arg2:
                     # If both inputs are arrays where one is of floating type and another - integer,
                     # NumPy will return an output array of always "float64" type,