diff --git a/dpnp/backend/kernels/dpnp_krnl_bitwise.cpp b/dpnp/backend/kernels/dpnp_krnl_bitwise.cpp
index f3d8a4a95ccd..6264d5d7146d 100644
--- a/dpnp/backend/kernels/dpnp_krnl_bitwise.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_bitwise.cpp
@@ -281,8 +281,8 @@ static void func_map_init_bitwise_1arg_1type(func_map_t& fmap)
                 const size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */                  \
                 {                                                                                                      \
                     const shape_elem_type* result_strides_data = &dev_strides_data[0];                                 \
-                    const shape_elem_type* input1_strides_data = &dev_strides_data[1];                                 \
-                    const shape_elem_type* input2_strides_data = &dev_strides_data[2];                                 \
+                    const shape_elem_type* input1_strides_data = &dev_strides_data[result_ndim];                       \
+                    const shape_elem_type* input2_strides_data = &dev_strides_data[2 * result_ndim];                   \
                                                                                                                        \
                     size_t input1_id = 0;                                                                              \
                     size_t input2_id = 0;                                                                              \
diff --git a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp
index 741a945fb099..0f691a03ab60 100644
--- a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp
@@ -111,7 +111,7 @@
                 size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */                        \
                 {                                                                                                      \
                     const shape_elem_type* result_strides_data = &dev_strides_data[0];                                 \
-                    const shape_elem_type* input1_strides_data = &dev_strides_data[1];                                 \
+                    const shape_elem_type* input1_strides_data = &dev_strides_data[result_ndim];                       \
                                                                                                                        \
                     size_t input_id = 0;                                                                               \
                     for (size_t i = 0; i < input1_ndim; ++i)                                                           \
@@ -635,7 +635,7 @@ static void func_map_init_elemwise_1arg_2type(func_map_t& fmap)
                 size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */                        \
                 {                                                                                                      \
                     const shape_elem_type* result_strides_data = &dev_strides_data[0];                                 \
-                    const shape_elem_type* input1_strides_data = &dev_strides_data[1];                                 \
+                    const shape_elem_type* input1_strides_data = &dev_strides_data[result_ndim];                       \
                                                                                                                        \
                     size_t input_id = 0;                                                                               \
                     for (size_t i = 0; i < input1_ndim; ++i)                                                           \
@@ -995,8 +995,8 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap)
                 const size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */                  \
                 {                                                                                                      \
                     const shape_elem_type* result_strides_data = &dev_strides_data[0];                                 \
-                    const shape_elem_type* input1_strides_data = &dev_strides_data[1];                                 \
-                    const shape_elem_type* input2_strides_data = &dev_strides_data[2];                                 \
+                    const shape_elem_type* input1_strides_data = &dev_strides_data[result_ndim];                       \
+                    const shape_elem_type* input2_strides_data = &dev_strides_data[2 * result_ndim];                   \
                                                                                                                        \
                     size_t input1_id = 0;                                                                              \
                     size_t input2_id = 0;                                                                              \
diff --git a/dpnp/backend/kernels/dpnp_krnl_logic.cpp b/dpnp/backend/kernels/dpnp_krnl_logic.cpp
index d1a6767c2adc..78a9a29e99e7 100644
--- a/dpnp/backend/kernels/dpnp_krnl_logic.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_logic.cpp
@@ -396,7 +396,7 @@ DPCTLSyclEventRef (*dpnp_any_ext_c)(DPCTLSyclQueueRef,
                 const size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */                  \
                 {                                                                                                      \
                     const shape_elem_type *result_strides_data = &dev_strides_data[0];                                 \
-                    const shape_elem_type *input1_strides_data = &dev_strides_data[1];                                 \
+                    const shape_elem_type *input1_strides_data = &dev_strides_data[result_ndim];                       \
                                                                                                                        \
                     size_t input1_id = 0;                                                                              \
                                                                                                                        \
@@ -635,8 +635,8 @@ static void func_map_logic_1arg_1type_helper(func_map_t& fmap)
                 const size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */                  \
                 {                                                                                                      \
                     const shape_elem_type *result_strides_data = &dev_strides_data[0];                                 \
-                    const shape_elem_type *input1_strides_data = &dev_strides_data[1];                                 \
-                    const shape_elem_type *input2_strides_data = &dev_strides_data[2];                                 \
+                    const shape_elem_type *input1_strides_data = &dev_strides_data[result_ndim];                       \
+                    const shape_elem_type *input2_strides_data = &dev_strides_data[2 * result_ndim];                   \
                                                                                                                        \
                     size_t input1_id = 0;                                                                              \
                     size_t input2_id = 0;                                                                              \
diff --git a/dpnp/backend/kernels/dpnp_krnl_searching.cpp b/dpnp/backend/kernels/dpnp_krnl_searching.cpp
index fef5f78d15da..471d524643f5 100644
--- a/dpnp/backend/kernels/dpnp_krnl_searching.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_searching.cpp
@@ -294,9 +294,9 @@ DPCTLSyclEventRef dpnp_where_c(DPCTLSyclQueueRef q_ref,
             const size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */
             {
                 const shape_elem_type* result_strides_data = &dev_strides_data[0];
-                const shape_elem_type* condition_strides_data = &dev_strides_data[1];
-                const shape_elem_type* input1_strides_data = &dev_strides_data[2];
-                const shape_elem_type* input2_strides_data = &dev_strides_data[3];
+                const shape_elem_type* condition_strides_data = &dev_strides_data[result_ndim];
+                const shape_elem_type* input1_strides_data = &dev_strides_data[2 * result_ndim];
+                const shape_elem_type* input2_strides_data = &dev_strides_data[3 * result_ndim];
 
                 size_t condition_id = 0;
                 size_t input1_id = 0;
diff --git a/dpnp/dpnp_algo/dpnp_algo.pyx b/dpnp/dpnp_algo/dpnp_algo.pyx
index 2fa9de34b998..923454142f43 100644
--- a/dpnp/dpnp_algo/dpnp_algo.pyx
+++ b/dpnp/dpnp_algo/dpnp_algo.pyx
@@ -505,8 +505,23 @@ cdef utils.dpnp_descriptor call_fptr_2in_1out_strides(DPNPFuncName fptr_name,
         return_type = kernel_data.return_type_no_fp64
         func = < fptr_2in_1out_strides_t > kernel_data.ptr_no_fp64
 
-    if out is None:
-        """ Create result array with type given by FPTR data """
+    # check 'out' parameter data
+    if out is not None:
+        if out.shape != result_shape:
+            utils.checker_throw_value_error(func_name, 'out.shape', out.shape, result_shape)
+
+        utils.get_common_usm_allocation(x1_obj, out)  # check USM allocation is common
+
+    if out is None or out.is_array_overlapped(x1_obj) or out.is_array_overlapped(x2_obj) or not out.match_ctype(return_type):
+        """
+        Create result array with type given by FPTR data.
+        If 'out' array has another dtype than expected or overlaps a memory from any input array,
+        we have to create a temporary array and to copy data from the temporary into 'out' array,
+        once the computation is completed.
+        Otherwise simultaneously access to the same memory may cause a race condition issue
+        which will result into undefined behaviour.
+        """
+        is_result_memory_allocated = True
         result = utils.create_output_descriptor(result_shape,
                                                 return_type,
                                                 None,
@@ -514,16 +529,9 @@ cdef utils.dpnp_descriptor call_fptr_2in_1out_strides(DPNPFuncName fptr_name,
                                                 usm_type=result_usm_type,
                                                 sycl_queue=result_sycl_queue)
     else:
-        result_type = dpnp_DPNPFuncType_to_dtype(< size_t > return_type)
-        if out.dtype != result_type:
-            utils.checker_throw_value_error(func_name, 'out.dtype', out.dtype, result_type)
-        if out.shape != result_shape:
-            utils.checker_throw_value_error(func_name, 'out.shape', out.shape, result_shape)
-
+        is_result_memory_allocated = False
         result = out
 
-        utils.get_common_usm_allocation(x1_obj, result)  # check USM allocation is common
-
     cdef shape_type_c result_strides = utils.strides_to_vector(result.strides, result_shape)
 
     result_obj = result.get_array()
@@ -554,4 +562,7 @@ cdef utils.dpnp_descriptor call_fptr_2in_1out_strides(DPNPFuncName fptr_name,
     with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
     c_dpctl.DPCTLEvent_Delete(event_ref)
 
-    return result
+    if out is not None and is_result_memory_allocated:
+        return out.get_result_desc(result)
+
+    return result.get_result_desc()
diff --git a/dpnp/dpnp_iface.py b/dpnp/dpnp_iface.py
index 9bf456060ddd..6a5bcf239df2 100644
--- a/dpnp/dpnp_iface.py
+++ b/dpnp/dpnp_iface.py
@@ -272,6 +272,10 @@ def get_dpnp_descriptor(ext_obj,
     if use_origin_backend():
         return False
 
+    # It's required to keep track of input object if a non-strided copy is going to be created.
+    # Thus there will be an extra descriptor allocated to refer on original input.
+    orig_desc = None
+
     # If input object is a scalar, it means it was allocated on host memory.
     # We need to copy it to USM memory according to compute follows data paradigm.
     if isscalar(ext_obj):
@@ -291,6 +295,7 @@ def get_dpnp_descriptor(ext_obj,
             ext_obj_offset = 0
 
         if ext_obj.strides != shape_offsets or ext_obj_offset != 0:
+            orig_desc = dpnp_descriptor(ext_obj)
             ext_obj = array(ext_obj)
 
     # while dpnp functions are based on DPNP_QUEUE
@@ -304,7 +309,7 @@ def get_dpnp_descriptor(ext_obj,
         if not queue_is_default:
             ext_obj = array(ext_obj, sycl_queue=default_queue)
 
-    dpnp_desc = dpnp_descriptor(ext_obj)
+    dpnp_desc = dpnp_descriptor(ext_obj, orig_desc)
     if dpnp_desc.is_valid:
         return dpnp_desc
 
diff --git a/dpnp/dpnp_iface_bitwise.py b/dpnp/dpnp_iface_bitwise.py
index 36f37f4282ec..92f33bc6310a 100644
--- a/dpnp/dpnp_iface_bitwise.py
+++ b/dpnp/dpnp_iface_bitwise.py
@@ -62,7 +62,9 @@
 def _check_nd_call(origin_func, dpnp_func, x1, x2, dtype=None, out=None, where=True, **kwargs):
     """Choose function to call based on input and call chosen fucntion."""
 
-    if where is not True:
+    if kwargs:
+        pass
+    elif where is not True:
         pass
     elif dtype is not None:
         pass
@@ -85,7 +87,7 @@ def _check_nd_call(origin_func, dpnp_func, x1, x2, dtype=None, out=None, where=T
             if out is not None:
                 if not isinstance(out, (dpnp.ndarray, dpt.usm_ndarray)):
                     raise TypeError("return array must be of supported array type")
-                out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False)
+                out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) or None
             else:
                 out_desc = None
 
@@ -273,7 +275,7 @@ def invert(x,
             if out is not None:
                 if not isinstance(out, (dpnp.ndarray, dpt.usm_ndarray)):
                     raise TypeError("return array must be of supported array type")
-                out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False)
+                out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) or None
             else:
                 out_desc = None
         return dpnp_invert(x1_desc, out_desc).get_pyobj()
diff --git a/dpnp/dpnp_iface_linearalgebra.py b/dpnp/dpnp_iface_linearalgebra.py
index a989f745c0a1..2a643fc8469b 100644
--- a/dpnp/dpnp_iface_linearalgebra.py
+++ b/dpnp/dpnp_iface_linearalgebra.py
@@ -114,7 +114,7 @@ def dot(x1, x2, out=None, **kwargs):
             if out is not None:
                 if not isinstance(out, (dpnp.ndarray, dpt.usm_ndarray)):
                     raise TypeError("return array must be of supported array type")
-                out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False)
+                out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) or None
             else:
                 out_desc = None
             return dpnp_dot(x1_desc, x2_desc, out=out_desc).get_pyobj()
diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py
index 08de8b2ba5a8..a001b055a280 100644
--- a/dpnp/dpnp_iface_mathematical.py
+++ b/dpnp/dpnp_iface_mathematical.py
@@ -95,6 +95,41 @@
 ]
 
 
+def _check_nd_call(origin_func, dpnp_func, x1, x2, out=None, where=True, dtype=None, subok=True, **kwargs):
+    """Choose function to call based on input and call chosen fucntion."""
+
+    if kwargs:
+        pass
+    elif where is not True:
+        pass
+    elif dtype is not None:
+        pass
+    elif subok is not True:
+        pass
+    elif dpnp.isscalar(x1) and dpnp.isscalar(x2):
+        # at least either x1 or x2 has to be an array
+        pass
+    else:
+        # get USM type and queue to copy scalar from the host memory into a USM allocation
+        usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None)
+
+        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        if x1_desc and x2_desc:
+            if out is not None:
+                if not isinstance(out, (dpnp.ndarray, dpt.usm_ndarray)):
+                    raise TypeError("return array must be of supported array type")
+                out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) or None
+            else:
+                out_desc = None
+
+            return dpnp_func(x1_desc, x2_desc, dtype=dtype, out=out_desc, where=where).get_pyobj()
+
+    return call_origin(origin_func, x1, x2, dtype=dtype, out=out, where=where, **kwargs)
+
+
 def abs(*args, **kwargs):
     """
     Calculate the absolute value element-wise.
@@ -1397,34 +1432,7 @@ def power(x1,
 
     """
 
-    if where is not True:
-        pass
-    elif dtype is not None:
-        pass
-    elif subok is not True:
-        pass
-    elif dpnp.isscalar(x1) and dpnp.isscalar(x2):
-        # at least either x1 or x2 has to be an array
-        pass
-    else:
-        # get USM type and queue to copy scalar from the host memory into a USM allocation
-        usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None)
-
-        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False,
-                                           alloc_usm_type=usm_type, alloc_queue=queue)
-        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False,
-                                           alloc_usm_type=usm_type, alloc_queue=queue)
-        if x1_desc and x2_desc:
-            if out is not None:
-                if not isinstance(out, (dpnp.ndarray, dpt.usm_ndarray)):
-                    raise TypeError("return array must be of supported array type")
-                out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False)
-            else:
-                out_desc = None
-
-            return dpnp_power(x1_desc, x2_desc, dtype=dtype, out=out_desc, where=where).get_pyobj()
-
-    return call_origin(numpy.power, x1, x2, out=out, where=where, dtype=dtype, subok=subok, **kwargs)
+    return _check_nd_call(numpy.power, dpnp_power, x1, x2, out=out, where=where, dtype=dtype, subok=subok, **kwargs)
 
 
 def prod(x1, axis=None, dtype=None, out=None, keepdims=False, initial=None, where=True):
diff --git a/dpnp/dpnp_utils/dpnp_algo_utils.pxd b/dpnp/dpnp_utils/dpnp_algo_utils.pxd
index db7127319bb0..0015e8d12c02 100644
--- a/dpnp/dpnp_utils/dpnp_algo_utils.pxd
+++ b/dpnp/dpnp_utils/dpnp_algo_utils.pxd
@@ -116,11 +116,13 @@ cdef class dpnp_descriptor:
 
     cdef public:  # TODO remove "public" as python accessible attribute
         object origin_pyobj
+        dpnp_descriptor origin_desc
         dict descriptor
         Py_ssize_t dpnp_descriptor_data_size
         cpp_bool dpnp_descriptor_is_scalar
 
     cdef void * get_data(self)
+    cdef cpp_bool match_ctype(self, DPNPFuncType ctype)
 
 
 cdef shape_type_c get_common_shape(shape_type_c input1_shape, shape_type_c input2_shape) except *
diff --git a/dpnp/dpnp_utils/dpnp_algo_utils.pyx b/dpnp/dpnp_utils/dpnp_algo_utils.pyx
index a94381788764..7a3fb316261d 100644
--- a/dpnp/dpnp_utils/dpnp_algo_utils.pyx
+++ b/dpnp/dpnp_utils/dpnp_algo_utils.pyx
@@ -35,6 +35,8 @@ import numpy
 
 import dpctl
 import dpctl.utils as dpu
+import dpctl.tensor._copy_utils as dpt_cu
+import dpctl.tensor._tensor_impl as dpt_ti
 
 import dpnp.config as config
 import dpnp.dpnp_container as dpnp_container
@@ -660,9 +662,10 @@ cdef tuple get_common_usm_allocation(dpnp_descriptor x1, dpnp_descriptor x2):
 
 
 cdef class dpnp_descriptor:
-    def __init__(self, obj):
+    def __init__(self, obj, dpnp_descriptor orig_desc=None):
         """ Initialze variables """
         self.origin_pyobj = None
+        self.origin_desc = None
         self.descriptor = None
         self.dpnp_descriptor_data_size = 0
         self.dpnp_descriptor_is_scalar = True
@@ -681,6 +684,10 @@ cdef class dpnp_descriptor:
 
         self.origin_pyobj = obj
 
+        """ Keep track of a descriptor with original data """
+        if orig_desc is not None and orig_desc.is_valid:
+            self.origin_desc = orig_desc
+
         """ array size calculation """
         cdef Py_ssize_t shape_it = 0
         self.dpnp_descriptor_data_size = 1
@@ -740,6 +747,14 @@ cdef class dpnp_descriptor:
     def is_scalar(self):
         return self.dpnp_descriptor_is_scalar
 
+    @property
+    def is_temporary(self):
+        """
+        Non-none descriptor of original data means the current descriptor
+        holds a temporary allocated data.
+        """
+        return self.origin_desc is not None
+
     @property
     def data(self):
         if self.is_valid:
@@ -771,6 +786,15 @@ cdef class dpnp_descriptor:
 
         return interface_dict
 
+    def _copy_array_from(self, other_desc):
+        """
+        Fill array data with usm_ndarray of the same shape from other DPNP descriptor
+        """
+        if not isinstance(other_desc, dpnp_descriptor):
+            raise TypeError("expected dpnp_descriptor, got {}".format(type(other_desc)))
+
+        dpt_cu._copy_same_shape(self.get_array(), other_desc.get_array())
+
     def get_pyobj(self):
         return self.origin_pyobj
 
@@ -784,6 +808,29 @@ cdef class dpnp_descriptor:
             "expected either dpctl.tensor.usm_ndarray or dpnp.dpnp_array.dpnp_array, got {}"
             "".format(type(self.origin_pyobj)))
 
+    def get_result_desc(self, result_desc=None):
+        """
+        Copy the result data into an original array
+        """
+        if self.is_temporary:
+            # Original descriptor is not None, so copy the array data into it and return
+            from_desc = self if result_desc is None else result_desc
+            self.origin_desc._copy_array_from(from_desc)
+            return self.origin_desc
+        elif result_desc is not None:
+            # A temporary result descriptor was allocated, needs to copy data back into 'out' descriptor
+            self._copy_array_from(result_desc)
+        return self
+
+    def is_array_overlapped(self, other_desc):
+        """
+        Check if usm_ndarray overlaps an array from other DPNP descriptor
+        """
+        if not isinstance(other_desc, dpnp_descriptor):
+            raise TypeError("expected dpnp_descriptor, got {}".format(type(other_desc)))
+
+        return dpt_ti._array_overlap(self.get_array(), other_desc.get_array())
+
     cdef void * get_data(self):
         cdef Py_ssize_t item_size = 0
         cdef Py_ssize_t elem_offset = 0
@@ -798,6 +845,9 @@ cdef class dpnp_descriptor:
 
         return < void * > val
 
+    cdef cpp_bool match_ctype(self, DPNPFuncType ctype):
+        return self.dtype == dpnp_DPNPFuncType_to_dtype(< size_t > ctype)
+
     def __bool__(self):
         return self.is_valid
 
diff --git a/tests/test_mathematical.py b/tests/test_mathematical.py
index e58e129c03b3..4b3b5d07f941 100644
--- a/tests/test_mathematical.py
+++ b/tests/test_mathematical.py
@@ -635,7 +635,6 @@ def test_invalid_shape(self, shape):
 
 
 class TestPower:
-
     def test_power(self):
         array1_data = numpy.arange(10)
         array2_data = numpy.arange(5, 15)
@@ -655,13 +654,45 @@ def test_power(self):
         assert_array_equal(expected, result)
 
     @pytest.mark.parametrize("dtype", get_all_dtypes(no_complex=True, no_none=True))
-    def test_invalid_dtype(self, dtype):
-        dp_array1 = dpnp.arange(10, dtype=dpnp.complex64)
-        dp_array2 = dpnp.arange(5, 15, dtype=dpnp.complex64)
-        dp_out = dpnp.empty(10, dtype=dtype)
+    def test_out_dtypes(self, dtype):
+        size = 2 if dtype == dpnp.bool else 5
 
-        with pytest.raises(ValueError):
-            dpnp.power(dp_array1, dp_array2, out=dp_out)
+        np_array1 = numpy.arange(size, 2 * size, dtype=dtype)
+        np_array2 = numpy.arange(size, dtype=dtype)
+        np_out = numpy.empty(size, dtype=numpy.complex64)
+        expected = numpy.power(np_array1, np_array2, out=np_out)
+
+        dp_array1 = dpnp.arange(size, 2*size, dtype=dtype)
+        dp_array2 = dpnp.arange(size, dtype=dtype)
+        dp_out = dpnp.empty(size, dtype=dpnp.complex64)
+        result = dpnp.power(dp_array1, dp_array2, out=dp_out)
+
+        assert_array_equal(expected, result)
+
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True, no_none=True))
+    def test_out_overlap(self, dtype):
+        size = 5
+
+        np_a = numpy.arange(2 * size, dtype=dtype)
+        expected = numpy.power(np_a[size::], np_a[::2], out=np_a[:size:])
+
+        dp_a = dpnp.arange(2 * size, dtype=dtype)
+        result = dpnp.power(dp_a[size::], dp_a[::2], out=dp_a[:size:])
+
+        assert_allclose(expected, result)
+        assert_allclose(dp_a, np_a)
+
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True, no_none=True))
+    def test_inplace_strided_out(self, dtype):
+        size = 5
+
+        np_a = numpy.arange(2 * size, dtype=dtype)
+        np_a[::3] **= 3
+
+        dp_a = dpnp.arange(2 * size, dtype=dtype)
+        dp_a[::3] **= 3
+
+        assert_allclose(dp_a, np_a)
 
     @pytest.mark.parametrize("shape",
                              [(0,), (15, ), (2, 2)],
diff --git a/tests/test_strides.py b/tests/test_strides.py
index e56e9befeee4..10bd575bf6a9 100644
--- a/tests/test_strides.py
+++ b/tests/test_strides.py
@@ -214,3 +214,102 @@ def test_strides_true_devide(dtype, shape):
     expected = numpy.fmod(a, b)
 
     assert_allclose(result, expected)
+
+
+@pytest.mark.parametrize("func_name",
+                         ["power"])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
+def test_strided_out_2args(func_name, dtype):
+    np_out = numpy.ones((5, 3, 2))[::3]
+    np_a = numpy.arange(numpy.prod(np_out.shape), dtype=dtype).reshape(np_out.shape)
+    np_b = numpy.full(np_out.shape, fill_value=0.7, dtype=dtype)
+
+    dp_out = dpnp.ones((5, 3, 2))[::3]
+    dp_a = dpnp.array(np_a)
+    dp_b = dpnp.array(np_b)
+
+    np_res = _getattr(numpy, func_name)(np_a, np_b, out=np_out)
+    dp_res = _getattr(dpnp, func_name)(dp_a, dp_b, out=dp_out)
+
+    assert_allclose(dp_res.asnumpy(), np_res)
+    assert_allclose(dp_out.asnumpy(), np_out)
+
+
+@pytest.mark.parametrize("func_name",
+                         ["power"])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
+def test_strided_in_out_2args(func_name, dtype):
+    sh = (3, 4, 2)
+    prod = numpy.prod(sh)
+
+    np_out = numpy.ones(sh, dtype=dtype)[::2]
+    np_a = numpy.arange(prod, dtype=dtype).reshape(sh)[::2]
+    np_b = numpy.full(sh, fill_value=0.7, dtype=dtype)[::2].T
+
+    dp_out = dpnp.ones(sh, dtype=dtype)[::2]
+    dp_a = dpnp.arange(prod, dtype=dtype).reshape(sh)[::2]
+    dp_b = dpnp.full(sh, fill_value=0.7, dtype=dtype)[::2].T
+
+    np_res = _getattr(numpy, func_name)(np_a, np_b, out=np_out)
+    dp_res = _getattr(dpnp, func_name)(dp_a, dp_b, out=dp_out)
+
+    assert_allclose(dp_res.asnumpy(), np_res, rtol=1e-06)
+    assert_allclose(dp_out.asnumpy(), np_out, rtol=1e-06)
+
+
+@pytest.mark.parametrize("func_name",
+                         ["power"])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
+def test_strided_in_out_2args_diff_out_dtype(func_name, dtype):
+    sh = (3, 3, 2)
+    prod = numpy.prod(sh)
+
+    np_out = numpy.ones(sh, dtype=numpy.complex64)[::2]
+    np_a = numpy.arange(prod, dtype=dtype).reshape(sh)[::2].T
+    np_b = numpy.full(sh, fill_value=0.7, dtype=dtype)[::2]
+
+    dp_out = dpnp.ones(sh, dtype=dpnp.complex64)[::2]
+    dp_a = dpnp.arange(prod, dtype=dtype).reshape(sh)[::2].T
+    dp_b = dpnp.full(sh, fill_value=0.7, dtype=dtype)[::2]
+
+    np_res = _getattr(numpy, func_name)(np_a, np_b, out=np_out)
+    dp_res = _getattr(dpnp, func_name)(dp_a, dp_b, out=dp_out)
+
+    assert_allclose(dp_res.asnumpy(), np_res, rtol=1e-06)
+    assert_allclose(dp_out.asnumpy(), np_out, rtol=1e-06)
+
+
+@pytest.mark.parametrize("func_name",
+                         ["power"])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True, no_none=True))
+def test_strided_in_2args_overlap(func_name, dtype):
+    size = 5
+
+    np_a = numpy.arange(2 * size, dtype=dtype)
+    dp_a = dpnp.arange(2 * size, dtype=dtype)
+
+    np_res = _getattr(numpy, func_name)(np_a[size::], np_a[::2], out=np_a[:size:])
+    dp_res = _getattr(dpnp, func_name)(dp_a[size::], dp_a[::2], out=dp_a[:size:])
+
+    assert_allclose(dp_res.asnumpy(), np_res, rtol=1e-06)
+    assert_allclose(dp_a.asnumpy(), np_a, rtol=1e-06)
+
+
+@pytest.mark.parametrize("func_name",
+                         ["power"])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True, no_none=True))
+def test_strided_in_out_2args_overlap(func_name, dtype):
+    sh = (4, 3, 2)
+    prod = numpy.prod(sh)
+
+    np_a = numpy.arange(prod, dtype=dtype).reshape(sh)
+    np_b = numpy.full(np_a[::2].shape, fill_value=0.7, dtype=dtype)
+
+    dp_a = dpnp.arange(prod, dtype=dtype).reshape(sh)
+    dp_b = dpnp.full(dp_a[::2].shape, fill_value=0.7, dtype=dtype)
+
+    np_res = _getattr(numpy, func_name)(np_a[::2], np_b, out=np_a[1::2])
+    dp_res = _getattr(dpnp, func_name)(dp_a[::2], dp_b, out=dp_a[1::2])
+
+    assert_allclose(dp_res.asnumpy(), np_res, rtol=1e-06)
+    assert_allclose(dp_a.asnumpy(), np_a, rtol=1e-06)
diff --git a/tests/test_umath.py b/tests/test_umath.py
index 6122b253ca37..3a1f4467dcea 100644
--- a/tests/test_umath.py
+++ b/tests/test_umath.py
@@ -1,6 +1,14 @@
 import pytest
+from .helper import (
+    get_all_dtypes
+)
 
 import numpy
+from numpy.testing import (
+    assert_allclose,
+    assert_array_equal
+)
+
 import dpnp
 
 # full list of umaths
@@ -71,7 +79,7 @@ def test_umaths(test_cases):
     # DPNP
     result = getattr(dpnp, umath)(*iargs)
 
-    numpy.testing.assert_allclose(result, expected, rtol=1e-6)
+    assert_allclose(result, expected, rtol=1e-6)
 
 
 class TestSin:
@@ -89,7 +97,7 @@ def test_sin_ordinary(self):
         np_array = numpy.array(array_data, dtype=numpy.float64)
         expected = numpy.sin(np_array, out=out)
 
-        numpy.testing.assert_array_equal(expected, result)
+        assert_array_equal(expected, result)
 
     @pytest.mark.parametrize("dtype",
                              [numpy.float32, numpy.int64, numpy.int32],
@@ -129,7 +137,7 @@ def test_cos(self):
         np_array = numpy.array(array_data, dtype=numpy.float64)
         expected = numpy.cos(np_array, out=out)
 
-        numpy.testing.assert_array_equal(expected, result)
+        assert_array_equal(expected, result)
 
     @pytest.mark.parametrize("dtype",
                              [numpy.float32, numpy.int64, numpy.int32],
@@ -169,7 +177,7 @@ def test_log(self):
         np_array = numpy.array(array_data, dtype=numpy.float64)
         expected = numpy.log(np_array, out=out)
 
-        numpy.testing.assert_array_equal(expected, result)
+        assert_array_equal(expected, result)
 
     @pytest.mark.parametrize("dtype",
                              [numpy.float32, numpy.int64, numpy.int32],
@@ -209,7 +217,7 @@ def test_exp(self):
         np_array = numpy.array(array_data, dtype=numpy.float64)
         expected = numpy.exp(np_array, out=out)
 
-        numpy.testing.assert_array_equal(expected, result)
+        assert_array_equal(expected, result)
 
     @pytest.mark.parametrize("dtype",
                              [numpy.float32, numpy.int64, numpy.int32],
@@ -249,7 +257,7 @@ def test_arcsin(self):
         np_array = numpy.array(array_data, dtype=numpy.float64)
         expected = numpy.arcsin(np_array, out=out)
 
-        numpy.testing.assert_array_equal(expected, result)
+        assert_array_equal(expected, result)
 
     @pytest.mark.parametrize("dtype",
                              [numpy.float32, numpy.int64, numpy.int32],
@@ -289,7 +297,7 @@ def test_arctan(self):
         np_array = numpy.array(array_data, dtype=numpy.float64)
         expected = numpy.arctan(np_array, out=out)
 
-        numpy.testing.assert_array_equal(expected, result)
+        assert_array_equal(expected, result)
 
     @pytest.mark.parametrize("dtype",
                              [numpy.float32, numpy.int64, numpy.int32],
@@ -329,7 +337,7 @@ def test_tan(self):
         np_array = numpy.array(array_data, dtype=numpy.float64)
         expected = numpy.tan(np_array, out=out)
 
-        numpy.testing.assert_array_equal(expected, result)
+        assert_array_equal(expected, result)
 
     @pytest.mark.parametrize("dtype",
                              [numpy.float32, numpy.int64, numpy.int32],
@@ -355,7 +363,6 @@ def test_invalid_shape(self, shape):
 
 
 class TestArctan2:
-
     def test_arctan2(self):
         array_data = numpy.arange(10)
         out = numpy.empty(10, dtype=numpy.float64)
@@ -369,18 +376,21 @@ def test_arctan2(self):
         np_array = numpy.array(array_data, dtype=numpy.float64)
         expected = numpy.arctan2(np_array, np_array, out=out)
 
-        numpy.testing.assert_array_equal(expected, result)
+        assert_array_equal(expected, result)
 
-    @pytest.mark.parametrize("dtype",
-                             [numpy.float32, numpy.int64, numpy.int32],
-                             ids=['numpy.float32', 'numpy.int64', 'numpy.int32'])
-    def test_invalid_dtype(self, dtype):
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True, no_none=True))
+    def test_out_dtypes(self, dtype):
+        size = 2 if dtype == dpnp.bool else 10
 
-        dp_array = dpnp.arange(10, dtype=dpnp.float64)
-        dp_out = dpnp.empty(10, dtype=dtype)
+        np_array = numpy.arange(size, dtype=dtype)
+        np_out = numpy.empty(size, dtype=numpy.complex64)
+        expected = numpy.arctan2(np_array, np_array, out=np_out)
 
-        with pytest.raises(ValueError):
-            dpnp.arctan2(dp_array, dp_array, out=dp_out)
+        dp_array = dpnp.arange(size, dtype=dtype)
+        dp_out = dpnp.empty(size, dtype=dpnp.complex64)
+        result = dpnp.arctan2(dp_array, dp_array, out=dp_out)
+
+        assert_allclose(expected, result)
 
     @pytest.mark.parametrize("shape",
                              [(0,), (15, ), (2, 2)],