diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml
index 2f9a94db6914..05c216042098 100644
--- a/.github/workflows/conda-package.yml
+++ b/.github/workflows/conda-package.yml
@@ -196,8 +196,6 @@ jobs:
         run: |
           python -m pytest -q -ra --disable-warnings -vv ${{ env.TEST_SCOPE }}
         working-directory: ${{ env.tests-path }}
-        env:
-          SYCL_QUEUE_THREAD_POOL_SIZE: 6
 
   test_windows:
     name: Test ['windows-latest', python='${{ matrix.python }}']
@@ -335,8 +333,6 @@ jobs:
         run: |
           python -m pytest -q -ra --disable-warnings -vv ${{ env.TEST_SCOPE }}
         working-directory: ${{ env.tests-path }}
-        env:
-          SYCL_QUEUE_THREAD_POOL_SIZE: 6
 
   upload:
     name: Upload ['${{ matrix.os }}', python='${{ matrix.python }}']
diff --git a/.github/workflows/generate_coverage.yaml b/.github/workflows/generate_coverage.yaml
index d4158d212d43..413835d336cf 100644
--- a/.github/workflows/generate_coverage.yaml
+++ b/.github/workflows/generate_coverage.yaml
@@ -66,7 +66,6 @@ jobs:
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
           COVERALLS_PARALLEL: true
-          SYCL_QUEUE_THREAD_POOL_SIZE: 6
 
   coveralls:
     name: Indicate completion to coveralls.io
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 000000000000..7af00de7ce89
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,55 @@
+# Changelog
+All notable changes to this project will be documented in this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+
+## [0.12.0] - 06/15/2023
+
+### Added
+
+* Implemented `dpnp.broadcast_to` function [#1333](https://github.com/IntelPython/dpnp/pull/1333)
+* Implemented `dpnp.extract` function [#1340](https://github.com/IntelPython/dpnp/pull/1340)
+* Implemented `dpnp.linalg.eigh` function through pybind11 extension of OneMKL call [#1383](https://github.com/IntelPython/dpnp/pull/1383)
+* Implemented `dpnp.mean` function [#1431](https://github.com/IntelPython/dpnp/pull/1431)
+* Added support of bool types in bitwise operations [#1334](https://github.com/IntelPython/dpnp/pull/1334)
+* Added `out` parameter in `dpnp.add` function [#1329](https://github.com/IntelPython/dpnp/pull/1329)
+* Added `out` parameter in `dpnp.multiply` function [#1365](https://github.com/IntelPython/dpnp/pull/1365)
+* Added `out` parameter in `dpnp.sqrt` function [#1332](https://github.com/IntelPython/dpnp/pull/1332)
+* Added `rowvar` parameter in `dpnp.cov` function [#1371](https://github.com/IntelPython/dpnp/pull/1371)
+* Added `nbytes` property to dpnp array [#1359](https://github.com/IntelPython/dpnp/pull/1359)
+* Introduced a new github Action to control code coverage [#1373](https://github.com/IntelPython/dpnp/pull/1373)
+* Added change log [#1439](https://github.com/IntelPython/dpnp/pull/1439)
+
+
+### Changed
+
+* Leveraged `dpctl.tensor` implementation for `dpnp.place` function [#1337](https://github.com/IntelPython/dpnp/pull/1337)
+* Leveraged `dpctl.tensor` implementation for `dpnp.moveaxis` function [#1382](https://github.com/IntelPython/dpnp/pull/1382)
+* Leveraged `dpctl.tensor` implementation for `dpnp.squeeze` function [#1381](https://github.com/IntelPython/dpnp/pull/1381)
+* Leveraged `dpctl.tensor` implementation for `dpnp.where` function [#1380](https://github.com/IntelPython/dpnp/pull/1380)
+* Leveraged `dpctl.tensor` implementation for `dpnp.transpose` function [#1389](https://github.com/IntelPython/dpnp/pull/1389)
+* Leveraged `dpctl.tensor` implementation for `dpnp.reshape` function [#1391](https://github.com/IntelPython/dpnp/pull/1391)
+* Leveraged `dpctl.tensor` implementation for `dpnp.add`, `dpnp.multiply` and `dpnp.subtract` functions [#1430](https://github.com/IntelPython/dpnp/pull/1430)
+* Leveraged `dpctl.tensor` implementation for `dpnp.sum` function [#1426](https://github.com/IntelPython/dpnp/pull/1426)
+* Leveraged `dpctl.tensor` implementation for `dpnp.result_type` function [#1435](https://github.com/IntelPython/dpnp/pull/1435)
+* Reused OneDPL `std::nth_element` function in `dpnp.partition` with 1d array [#1406](https://github.com/IntelPython/dpnp/pull/1406)
+* Transitioned dpnp build system to use scikit-build [#1349](https://github.com/IntelPython/dpnp/pull/1349)
+* Renamed included dpnp_algo_*.pyx files to *.pxi [#1356](https://github.com/IntelPython/dpnp/pull/1356)
+* Implemented support of key as a tuple in `dpnp.__getitem__()` and `dpnp.__setitem__()` functions [#1362](https://github.com/IntelPython/dpnp/pull/1362)
+* Selected dpnp own kernels for elementwise functions instead of OneMKL VM calls on a device without fp64 aspect [#1386](https://github.com/IntelPython/dpnp/pull/1386)
+* Pinned to `sysroot>=2.28` and transitioned to `conda-forge` channel [#1408](https://github.com/IntelPython/dpnp/pull/1408)
+* Redesigned `dpnp.divide` implementation to call `div` from OneMKL for C-contiguous data or to use `dpctl.tensor` library otherwise [#1418](https://github.com/IntelPython/dpnp/pull/1418)
+* Changed an engine used for random generated array on GPU device from MT19937 to MCG59 [#1423](https://github.com/IntelPython/dpnp/pull/1423)
+* Implemented in-place support of `dpnp.divide` [#1434](https://github.com/IntelPython/dpnp/pull/1434)
+* Redesigned `dpnp.outer` implementation through `dpnp.multiply` with broadcasted arrays [#1436](https://github.com/IntelPython/dpnp/pull/1436)
+* Pinned to `dpctl>=0.14.3` as host and run dependencies [#1437](https://github.com/IntelPython/dpnp/pull/1437)
+* Reimplemented `dpnp.cov` through existing dpnp function instead of a separate kernel [#1396](https://github.com/IntelPython/dpnp/pull/1396)
+
+
+### Fixed
+
+* Fixed `dpnp.asarray` function to accept a sequence of dpnp arrays [#1355](https://github.com/IntelPython/dpnp/pull/1355)
+* Fixed crash in `dpnp.sum` with an empty array [#1369](https://github.com/IntelPython/dpnp/pull/1369)
+* Fixed compilation error around `sycl::abs` with DPC++ 2023.2.0 [#1393](https://github.com/IntelPython/dpnp/pull/1393)
+* Fixed Klockwork run and enabled cmake verbose mode for conda build [#1433](https://github.com/IntelPython/dpnp/pull/1433)
diff --git a/conda-recipe/bld.bat b/conda-recipe/bld.bat
index 9398b115175b..8b2e1927f7ff 100644
--- a/conda-recipe/bld.bat
+++ b/conda-recipe/bld.bat
@@ -18,7 +18,7 @@ set "DPL_ROOT_HINT=%PREFIX%/Library"
 %PYTHON% -m dpctl --cmakedir > Output
 set /p DPCTL_CMAKE_DIR= < Output
 
-set "SKBUILD_ARGS=-G Ninja -- -DCMAKE_C_COMPILER:PATH=icx -DCMAKE_CXX_COMPILER:PATH=icx"
+set "SKBUILD_ARGS=-G Ninja -- -DCMAKE_C_COMPILER:PATH=icx -DCMAKE_CXX_COMPILER:PATH=icx -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON"
 set "SKBUILD_ARGS=%SKBUILD_ARGS% -DDPCTL_MODULE_PATH:PATH=%DPCTL_CMAKE_DIR% "
 set "SKBUILD_ARGS=%SKBUILD_ARGS% -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON"
 
diff --git a/conda-recipe/build.sh b/conda-recipe/build.sh
index 8d832e5cb96c..09aa09734462 100644
--- a/conda-recipe/build.sh
+++ b/conda-recipe/build.sh
@@ -1,5 +1,11 @@
 #!/bin/bash
 
+# Workaround to Klocwork overwriting LD_LIBRARY_PATH that was modified
+# by DPC++ compiler conda packages. Will need to be added to DPC++ compiler
+# activation scripts.
+export LDFLAGS="$LDFLAGS -Wl,-rpath,$PREFIX/lib"
+export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$PREFIX/compiler/lib/intel64_lin:$PREFIX/compiler/lib:$PREFIX/lib"
+
 # Intel LLVM must cooperate with compiler and sysroot from conda
 echo "--gcc-toolchain=${BUILD_PREFIX} --sysroot=${BUILD_PREFIX}/${HOST}/sysroot -target ${HOST}" > icpx_for_conda.cfg
 export ICPXCFG="$(pwd)/icpx_for_conda.cfg"
@@ -10,7 +16,7 @@ export TBB_ROOT_HINT=$PREFIX
 export DPL_ROOT_HINT=$PREFIX
 export MKL_ROOT_HINT=$PREFIX
 SKBUILD_ARGS="-- -DDPCTL_MODULE_PATH=$($PYTHON -m dpctl --cmakedir) "
-SKBUILD_ARGS="${SKBUILD_ARGS} -DCMAKE_C_COMPILER:PATH=icx -DCMAKE_CXX_COMPILER:PATH=icpx"
+SKBUILD_ARGS="${SKBUILD_ARGS} -DCMAKE_C_COMPILER:PATH=icx -DCMAKE_CXX_COMPILER:PATH=icpx -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON"
 SKBUILD_ARGS="${SKBUILD_ARGS} -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON"
 
 # Build wheel package
diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml
index 011226d0165e..a7f20f0f01da 100644
--- a/conda-recipe/meta.yaml
+++ b/conda-recipe/meta.yaml
@@ -13,7 +13,7 @@ requirements:
       - cmake >=3.21
       - ninja
       - git
-      - dpctl >=0.14.2
+      - dpctl >=0.14.3
       - mkl-devel-dpcpp {{ environ.get('MKL_VER', '>=2023.1.0') }}
       - onedpl-devel
       - tbb-devel
@@ -25,7 +25,7 @@ requirements:
       - sysroot_linux-64 >=2.28 # [linux]
     run:
       - python
-      - dpctl >=0.14.2
+      - dpctl >=0.14.3
       - {{ pin_compatible('dpcpp-cpp-rt', min_pin='x.x', max_pin='x') }}
       - {{ pin_compatible('mkl-dpcpp', min_pin='x.x', max_pin='x') }}
       - {{ pin_compatible('numpy', min_pin='x.x', max_pin='x') }}
diff --git a/doc/conf.py b/doc/conf.py
index 14ad9efe447e..3f1469ea3ad9 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -31,9 +31,9 @@
 author = 'Intel'
 
 # The short X.Y version
-version = '0.11'
+version = '0.12'
 # The full version, including alpha/beta/rc tags
-release = '0.11.2dev1'
+release = '0.12.0'
 
 
 # -- General configuration ---------------------------------------------------
diff --git a/dpnp/__init__.py b/dpnp/__init__.py
index fc3fc5ff33fc..a081a909923c 100644
--- a/dpnp/__init__.py
+++ b/dpnp/__init__.py
@@ -27,6 +27,9 @@
 import os
 mypath = os.path.dirname(os.path.realpath(__file__))
 
+# workaround against hanging in OneMKL calls and in DPCTL
+os.environ.setdefault('SYCL_QUEUE_THREAD_POOL_SIZE', '6')
+
 import dpctl
 dpctlpath = os.path.dirname(dpctl.__file__)
 
@@ -41,9 +44,6 @@
         os.add_dll_directory(dpctlpath)
     os.environ["PATH"] = os.pathsep.join([os.getenv("PATH", ""), mypath, dpctlpath])
 
-# workaround against hanging in OneMKL calls
-os.environ.setdefault('SYCL_QUEUE_THREAD_POOL_SIZE', '6')
-
 from dpnp.dpnp_array import dpnp_array as ndarray
 from dpnp.dpnp_flatiter import flatiter as flatiter
 
diff --git a/dpnp/backend/doc/Doxyfile b/dpnp/backend/doc/Doxyfile
index 25dbd8972c48..40422a1a6773 100644
--- a/dpnp/backend/doc/Doxyfile
+++ b/dpnp/backend/doc/Doxyfile
@@ -38,7 +38,7 @@ PROJECT_NAME           = "DPNP C++ backend kernel library"
 # could be handy for archiving the generated documentation or if some version
 # control system is used.
 
-PROJECT_NUMBER         = 0.11.2dev1
+PROJECT_NUMBER         = 0.12.0
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a
diff --git a/dpnp/backend/include/dpnp_iface_fptr.hpp b/dpnp/backend/include/dpnp_iface_fptr.hpp
index 3a0dc7d0a526..71d77abbdd16 100644
--- a/dpnp/backend/include/dpnp_iface_fptr.hpp
+++ b/dpnp/backend/include/dpnp_iface_fptr.hpp
@@ -124,7 +124,6 @@ enum class DPNPFuncName : size_t
     DPNP_FN_COUNT_NONZERO,                /**< Used in numpy.count_nonzero() impl  */
     DPNP_FN_COUNT_NONZERO_EXT,            /**< Used in numpy.count_nonzero() impl, requires extra parameters */
     DPNP_FN_COV,                          /**< Used in numpy.cov() impl  */
-    DPNP_FN_COV_EXT,                      /**< Used in numpy.cov() impl, requires extra parameters */
     DPNP_FN_CROSS,                        /**< Used in numpy.cross() impl  */
     DPNP_FN_CROSS_EXT,                    /**< Used in numpy.cross() impl, requires extra parameters */
     DPNP_FN_CUMPROD,                      /**< Used in numpy.cumprod() impl  */
@@ -218,7 +217,6 @@ enum class DPNPFuncName : size_t
     DPNP_FN_MAXIMUM,                      /**< Used in numpy.maximum() impl  */
     DPNP_FN_MAXIMUM_EXT,                  /**< Used in numpy.maximum() impl , requires extra parameters */
     DPNP_FN_MEAN,                         /**< Used in numpy.mean() impl  */
-    DPNP_FN_MEAN_EXT,                     /**< Used in numpy.mean() impl, requires extra parameters */
     DPNP_FN_MEDIAN,                       /**< Used in numpy.median() impl  */
     DPNP_FN_MEDIAN_EXT,                   /**< Used in numpy.median() impl, requires extra parameters */
     DPNP_FN_MIN,                          /**< Used in numpy.min() impl  */
@@ -360,7 +358,6 @@ enum class DPNPFuncName : size_t
     DPNP_FN_TANH,                         /**< Used in numpy.tanh() impl  */
     DPNP_FN_TANH_EXT,                     /**< Used in numpy.tanh() impl, requires extra parameters */
     DPNP_FN_TRANSPOSE,                    /**< Used in numpy.transpose() impl  */
-    DPNP_FN_TRANSPOSE_EXT,                /**< Used in numpy.transpose() impl, requires extra parameters */
     DPNP_FN_TRACE,                        /**< Used in numpy.trace() impl  */
     DPNP_FN_TRACE_EXT,                    /**< Used in numpy.trace() impl, requires extra parameters */
     DPNP_FN_TRAPZ,                        /**< Used in numpy.trapz() impl  */
diff --git a/dpnp/backend/kernels/dpnp_krnl_manipulation.cpp b/dpnp/backend/kernels/dpnp_krnl_manipulation.cpp
index 8a122dbf7283..2fc7832b6bab 100644
--- a/dpnp/backend/kernels/dpnp_krnl_manipulation.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_manipulation.cpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2020, Intel Corporation
+// Copyright (c) 2016-2023, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -211,6 +211,7 @@ void dpnp_elemwise_transpose_c(void* array1_in,
                                                                        size,
                                                                        dep_event_vec_ref);
     DPCTLEvent_WaitAndThrow(event_ref);
+    DPCTLEvent_Delete(event_ref);
 }
 
 template <typename _DataType>
@@ -222,17 +223,6 @@ void (*dpnp_elemwise_transpose_default_c)(void*,
                                           void*,
                                           size_t) = dpnp_elemwise_transpose_c<_DataType>;
 
-template <typename _DataType>
-DPCTLSyclEventRef (*dpnp_elemwise_transpose_ext_c)(DPCTLSyclQueueRef,
-                                                   void*,
-                                                   const shape_elem_type*,
-                                                   const shape_elem_type*,
-                                                   const shape_elem_type*,
-                                                   size_t,
-                                                   void*,
-                                                   size_t,
-                                                   const DPCTLEventVectorRef) = dpnp_elemwise_transpose_c<_DataType>;
-
 void func_map_init_manipulation(func_map_t& fmap)
 {
     fmap[DPNPFuncName::DPNP_FN_REPEAT][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_repeat_default_c<int32_t>};
@@ -253,15 +243,5 @@ void func_map_init_manipulation(func_map_t& fmap)
                                                                (void*)dpnp_elemwise_transpose_default_c<float>};
     fmap[DPNPFuncName::DPNP_FN_TRANSPOSE][eft_DBL][eft_DBL] = {eft_DBL,
                                                                (void*)dpnp_elemwise_transpose_default_c<double>};
-
-    fmap[DPNPFuncName::DPNP_FN_TRANSPOSE_EXT][eft_INT][eft_INT] = {eft_INT,
-                                                                   (void*)dpnp_elemwise_transpose_ext_c<int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_TRANSPOSE_EXT][eft_LNG][eft_LNG] = {eft_LNG,
-                                                                   (void*)dpnp_elemwise_transpose_ext_c<int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_TRANSPOSE_EXT][eft_FLT][eft_FLT] = {eft_FLT,
-                                                                   (void*)dpnp_elemwise_transpose_ext_c<float>};
-    fmap[DPNPFuncName::DPNP_FN_TRANSPOSE_EXT][eft_DBL][eft_DBL] = {eft_DBL,
-                                                                   (void*)dpnp_elemwise_transpose_ext_c<double>};
-
     return;
 }
diff --git a/dpnp/backend/kernels/dpnp_krnl_random.cpp b/dpnp/backend/kernels/dpnp_krnl_random.cpp
index 568db448d966..7a79b235a4ef 100644
--- a/dpnp/backend/kernels/dpnp_krnl_random.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_random.cpp
@@ -77,24 +77,78 @@ void dpnp_rng_srand_c(size_t seed)
 }
 
 template <typename _DistrType, typename _EngineType, typename _DataType>
-static inline DPCTLSyclEventRef dpnp_rng_generate(const _DistrType& distr,
-                                                  _EngineType& engine,
-                                                  const int64_t size,
-                                                  _DataType* result) {
+static inline DPCTLSyclEventRef
+    dpnp_rng_generate(const _DistrType& distr, _EngineType& engine, const int64_t size, _DataType* result)
+{
     DPCTLSyclEventRef event_ref = nullptr;
     sycl::event event;
 
     // perform rng generation
-    try {
+    try
+    {
         event = mkl_rng::generate<_DistrType, _EngineType>(distr, engine, size, result);
         event_ref = reinterpret_cast<DPCTLSyclEventRef>(&event);
-    } catch (const std::exception &e) {
+    }
+    catch (const std::exception& e)
+    {
         // TODO: add error reporting
         return event_ref;
     }
     return DPCTLEvent_Copy(event_ref);
 }
 
+template <typename _EngineType, typename _DataType>
+static inline DPCTLSyclEventRef dpnp_rng_generate_uniform(
+    _EngineType& engine, sycl::queue* q, const _DataType a, const _DataType b, const int64_t size, _DataType* result)
+{
+    DPCTLSyclEventRef event_ref = nullptr;
+
+    if constexpr (std::is_same<_DataType, int32_t>::value)
+    {
+        if (q->get_device().has(sycl::aspect::fp64))
+        {
+            /**
+             * A note from oneMKL for oneapi::mkl::rng::uniform (Discrete):
+             * The oneapi::mkl::rng::uniform_method::standard uses the s BRNG type on GPU devices.
+             * This might cause the produced numbers to have incorrect statistics (due to rounding error)
+             * when abs(b-a) > 2^23 || abs(b) > 2^23 || abs(a) > 2^23. To get proper statistics for this case,
+             * use the oneapi::mkl::rng::uniform_method::accurate method instead.
+             */
+            using method_type = mkl_rng::uniform_method::accurate;
+            mkl_rng::uniform<_DataType, method_type> distribution(a, b);
+
+            // perform generation
+            try
+            {
+                sycl::event event = mkl_rng::generate(distribution, engine, size, result);
+
+                event_ref = reinterpret_cast<DPCTLSyclEventRef>(&event);
+                return DPCTLEvent_Copy(event_ref);
+            }
+            catch (const oneapi::mkl::unsupported_device&)
+            {
+                // fall through to try with uniform_method::standard
+            }
+            catch (const oneapi::mkl::unimplemented&)
+            {
+                // fall through to try with uniform_method::standard
+            }
+            catch (const std::exception& e)
+            {
+                // TODO: add error reporting
+                return event_ref;
+            }
+        }
+    }
+
+    // uniform_method::standard is a method used by default
+    using method_type = mkl_rng::uniform_method::standard;
+    mkl_rng::uniform<_DataType, method_type> distribution(a, b);
+
+    // perform generation
+    return dpnp_rng_generate(distribution, engine, size, result);
+}
+
 template <typename _DataType>
 DPCTLSyclEventRef dpnp_rng_beta_c(DPCTLSyclQueueRef q_ref,
                                   void* result,
@@ -1392,17 +1446,17 @@ DPCTLSyclEventRef dpnp_rng_normal_c(DPCTLSyclQueueRef q_ref,
 {
     // avoid warning unused variable
     (void)dep_event_vec_ref;
-    (void)q_ref;
 
     DPCTLSyclEventRef event_ref = nullptr;
+    sycl::queue* q = reinterpret_cast<sycl::queue*>(q_ref);
 
     if (!size)
     {
         return event_ref;
     }
+    assert(q != nullptr);
 
-    mt19937_struct* random_state = static_cast<mt19937_struct *>(random_state_in);
-    _DataType* result = static_cast<_DataType *>(result_out);
+    _DataType* result = static_cast<_DataType*>(result_out);
 
     // set mean of distribution
     const _DataType mean = static_cast<_DataType>(mean_in);
@@ -1410,31 +1464,57 @@ DPCTLSyclEventRef dpnp_rng_normal_c(DPCTLSyclQueueRef q_ref,
     const _DataType stddev = static_cast<_DataType>(stddev_in);
 
     mkl_rng::gaussian<_DataType> distribution(mean, stddev);
-    mkl_rng::mt19937 *engine = static_cast<mkl_rng::mt19937 *>(random_state->engine);
 
-    // perform generation
-    return dpnp_rng_generate<mkl_rng::gaussian<_DataType>, mkl_rng::mt19937, _DataType>(
-        distribution, *engine, size, result);
+    if (q->get_device().is_cpu())
+    {
+        mt19937_struct* random_state = static_cast<mt19937_struct*>(random_state_in);
+        mkl_rng::mt19937* engine = static_cast<mkl_rng::mt19937*>(random_state->engine);
+
+        // perform generation with MT19937 engine
+        event_ref = dpnp_rng_generate(distribution, *engine, size, result);
+    }
+    else
+    {
+        mcg59_struct* random_state = static_cast<mcg59_struct*>(random_state_in);
+        mkl_rng::mcg59* engine = static_cast<mkl_rng::mcg59*>(random_state->engine);
+
+        // perform generation with MCG59 engine
+        event_ref = dpnp_rng_generate(distribution, *engine, size, result);
+    }
+    return event_ref;
 }
 
 template <typename _DataType>
 void dpnp_rng_normal_c(void* result, const _DataType mean, const _DataType stddev, const size_t size)
 {
-    DPCTLSyclQueueRef q_ref = reinterpret_cast<DPCTLSyclQueueRef>(&DPNP_QUEUE);
+    sycl::queue* q = &DPNP_QUEUE;
+    DPCTLSyclQueueRef q_ref = reinterpret_cast<DPCTLSyclQueueRef>(q);
     DPCTLEventVectorRef dep_event_vec_ref = nullptr;
-    mt19937_struct* mt19937 = new mt19937_struct();
-    mt19937->engine = &DPNP_RNG_ENGINE;
+    DPCTLSyclEventRef event_ref = nullptr;
 
-    DPCTLSyclEventRef event_ref = dpnp_rng_normal_c<_DataType>(q_ref,
-                                                               result,
-                                                               mean,
-                                                               stddev,
-                                                               static_cast<int64_t>(size),
-                                                               mt19937,
-                                                               dep_event_vec_ref);
-    DPCTLEvent_WaitAndThrow(event_ref);
-    DPCTLEvent_Delete(event_ref);
-    delete mt19937;
+    if (q->get_device().is_cpu())
+    {
+        mt19937_struct* mt19937 = new mt19937_struct();
+        mt19937->engine = &DPNP_RNG_ENGINE;
+
+        event_ref = dpnp_rng_normal_c<_DataType>(
+            q_ref, result, mean, stddev, static_cast<int64_t>(size), mt19937, dep_event_vec_ref);
+        DPCTLEvent_WaitAndThrow(event_ref);
+        DPCTLEvent_Delete(event_ref);
+        delete mt19937;
+    }
+    else
+    {
+        // MCG59 engine is assumed to provide a better performance on GPU than MT19937
+        mcg59_struct* mcg59 = new mcg59_struct();
+        mcg59->engine = &DPNP_RNG_MCG59_ENGINE;
+
+        event_ref = dpnp_rng_normal_c<_DataType>(
+            q_ref, result, mean, stddev, static_cast<int64_t>(size), mcg59, dep_event_vec_ref);
+        DPCTLEvent_WaitAndThrow(event_ref);
+        DPCTLEvent_Delete(event_ref);
+        delete mcg59;
+    }
 }
 
 template <typename _DataType>
@@ -2149,74 +2229,75 @@ DPCTLSyclEventRef dpnp_rng_uniform_c(DPCTLSyclQueueRef q_ref,
         return event_ref;
     }
 
-    sycl::queue *q = reinterpret_cast<sycl::queue *>(q_ref);
+    sycl::queue* q = reinterpret_cast<sycl::queue*>(q_ref);
 
-    mt19937_struct* random_state = static_cast<mt19937_struct *>(random_state_in);
-    _DataType* result = static_cast<_DataType *>(result_out);
+    _DataType* result = static_cast<_DataType*>(result_out);
 
     // set left bound of distribution
     const _DataType a = static_cast<_DataType>(low);
     // set right bound of distribution
     const _DataType b = static_cast<_DataType>(high);
 
-    mkl_rng::mt19937 *engine = static_cast<mkl_rng::mt19937 *>(random_state->engine);
-
-    if constexpr (std::is_same<_DataType, int32_t>::value) {
-        if (q->get_device().has(sycl::aspect::fp64)) {
-            /**
-             * A note from oneMKL for oneapi::mkl::rng::uniform (Discrete):
-             * The oneapi::mkl::rng::uniform_method::standard uses the s BRNG type on GPU devices.
-             * This might cause the produced numbers to have incorrect statistics (due to rounding error)
-             * when abs(b-a) > 2^23 || abs(b) > 2^23 || abs(a) > 2^23. To get proper statistics for this case,
-             * use the oneapi::mkl::rng::uniform_method::accurate method instead.
-             */
-            using method_type = mkl_rng::uniform_method::accurate;
-            mkl_rng::uniform<_DataType, method_type> distribution(a, b);
+    if (q->get_device().is_cpu())
+    {
+        mt19937_struct* random_state = static_cast<mt19937_struct*>(random_state_in);
+        mkl_rng::mt19937* engine = static_cast<mkl_rng::mt19937*>(random_state->engine);
 
-            // perform generation
-            try {
-                auto event = mkl_rng::generate<mkl_rng::uniform<_DataType, method_type>, mkl_rng::mt19937>(
-                    distribution, *engine, size, result);
-                event_ref = reinterpret_cast<DPCTLSyclEventRef>(&event);
-                return DPCTLEvent_Copy(event_ref);
-            } catch (const oneapi::mkl::unsupported_device&) {
-                // fall through to try with uniform_method::standard
-            } catch (const oneapi::mkl::unimplemented&) {
-                // fall through to try with uniform_method::standard
-            } catch (const std::exception &e) {
-                // TODO: add error reporting
-                return event_ref;
-            }
-        }
+        // perform generation with MT19937 engine
+        event_ref = dpnp_rng_generate_uniform(*engine, q, a, b, size, result);
     }
+    else
+    {
+        mcg59_struct* random_state = static_cast<mcg59_struct*>(random_state_in);
+        mkl_rng::mcg59* engine = static_cast<mkl_rng::mcg59*>(random_state->engine);
 
-    // uniform_method::standard is a method used by default
-    using method_type = mkl_rng::uniform_method::standard;
-    mkl_rng::uniform<_DataType, method_type> distribution(a, b);
-
-    // perform generation
-    return dpnp_rng_generate<mkl_rng::uniform<_DataType, method_type>, mkl_rng::mt19937, _DataType>(
-        distribution, *engine, size, result);
+        // perform generation with MCG59 engine
+        event_ref = dpnp_rng_generate_uniform(*engine, q, a, b, size, result);
+    }
+    return event_ref;
 }
 
 template <typename _DataType>
 void dpnp_rng_uniform_c(void* result, const long low, const long high, const size_t size)
 {
-    DPCTLSyclQueueRef q_ref = reinterpret_cast<DPCTLSyclQueueRef>(&DPNP_QUEUE);
+    sycl::queue* q = &DPNP_QUEUE;
+    DPCTLSyclQueueRef q_ref = reinterpret_cast<DPCTLSyclQueueRef>(q);
     DPCTLEventVectorRef dep_event_vec_ref = nullptr;
-    mt19937_struct* mt19937 = new mt19937_struct();
-    mt19937->engine = &DPNP_RNG_ENGINE;
+    DPCTLSyclEventRef event_ref = nullptr;
 
-    DPCTLSyclEventRef event_ref = dpnp_rng_uniform_c<_DataType>(q_ref,
-                                                                result,
-                                                                static_cast<double>(low),
-                                                                static_cast<double>(high),
-                                                                static_cast<int64_t>(size),
-                                                                mt19937,
-                                                                dep_event_vec_ref);
-    DPCTLEvent_WaitAndThrow(event_ref);
-    DPCTLEvent_Delete(event_ref);
-    delete mt19937;
+    if (q->get_device().is_cpu())
+    {
+        mt19937_struct* mt19937 = new mt19937_struct();
+        mt19937->engine = &DPNP_RNG_ENGINE;
+
+        event_ref = dpnp_rng_uniform_c<_DataType>(q_ref,
+                                                  result,
+                                                  static_cast<double>(low),
+                                                  static_cast<double>(high),
+                                                  static_cast<int64_t>(size),
+                                                  mt19937,
+                                                  dep_event_vec_ref);
+        DPCTLEvent_WaitAndThrow(event_ref);
+        DPCTLEvent_Delete(event_ref);
+        delete mt19937;
+    }
+    else
+    {
+        // MCG59 engine is assumed to provide a better performance on GPU than MT19937
+        mcg59_struct* mcg59 = new mcg59_struct();
+        mcg59->engine = &DPNP_RNG_MCG59_ENGINE;
+
+        event_ref = dpnp_rng_uniform_c<_DataType>(q_ref,
+                                                  result,
+                                                  static_cast<double>(low),
+                                                  static_cast<double>(high),
+                                                  static_cast<int64_t>(size),
+                                                  mcg59,
+                                                  dep_event_vec_ref);
+        DPCTLEvent_WaitAndThrow(event_ref);
+        DPCTLEvent_Delete(event_ref);
+        delete mcg59;
+    }
 }
 
 template <typename _DataType>
diff --git a/dpnp/backend/kernels/dpnp_krnl_statistics.cpp b/dpnp/backend/kernels/dpnp_krnl_statistics.cpp
index eaaf6b72f89f..5dc3dc95373e 100644
--- a/dpnp/backend/kernels/dpnp_krnl_statistics.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_statistics.cpp
@@ -243,14 +243,6 @@ void dpnp_cov_c(void* array1_in, void* result1, size_t nrows, size_t ncols)
 template <typename _DataType>
 void (*dpnp_cov_default_c)(void*, void*, size_t, size_t) = dpnp_cov_c<_DataType>;
 
-template <typename _DataType>
-DPCTLSyclEventRef (*dpnp_cov_ext_c)(DPCTLSyclQueueRef,
-                                    void*,
-                                    void*,
-                                    size_t,
-                                    size_t,
-                                    const DPCTLEventVectorRef) = dpnp_cov_c<_DataType>;
-
 template <typename _DataType_input, typename _DataType_output>
 DPCTLSyclEventRef dpnp_count_nonzero_c(DPCTLSyclQueueRef q_ref,
                                        void* array1_in,
@@ -650,15 +642,6 @@ void (*dpnp_mean_default_c)(void*,
                             const shape_elem_type*,
                             size_t) = dpnp_mean_c<_DataType, _ResultType>;
 
-template <typename _DataType, typename _ResultType>
-DPCTLSyclEventRef (*dpnp_mean_ext_c)(DPCTLSyclQueueRef,
-                                     void*,
-                                     void*,
-                                     const shape_elem_type*,
-                                     const size_t,
-                                     const shape_elem_type*,
-                                     const size_t,
-                                     const DPCTLEventVectorRef) = dpnp_mean_c<_DataType, _ResultType>;
 
 template <typename _DataType, typename _ResultType>
 DPCTLSyclEventRef dpnp_median_c(DPCTLSyclQueueRef q_ref,
@@ -1382,11 +1365,6 @@ void func_map_init_statistics(func_map_t& fmap)
     fmap[DPNPFuncName::DPNP_FN_COV][eft_FLT][eft_FLT] = {eft_DBL, (void*)dpnp_cov_default_c<double>};
     fmap[DPNPFuncName::DPNP_FN_COV][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_cov_default_c<double>};
 
-    fmap[DPNPFuncName::DPNP_FN_COV_EXT][eft_INT][eft_INT] = {eft_DBL, (void*)dpnp_cov_ext_c<double>};
-    fmap[DPNPFuncName::DPNP_FN_COV_EXT][eft_LNG][eft_LNG] = {eft_DBL, (void*)dpnp_cov_ext_c<double>};
-    fmap[DPNPFuncName::DPNP_FN_COV_EXT][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_cov_ext_c<float>};
-    fmap[DPNPFuncName::DPNP_FN_COV_EXT][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_cov_ext_c<double>};
-
     fmap[DPNPFuncName::DPNP_FN_MAX][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_max_default_c<int32_t>};
     fmap[DPNPFuncName::DPNP_FN_MAX][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_max_default_c<int64_t>};
     fmap[DPNPFuncName::DPNP_FN_MAX][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_max_default_c<float>};
@@ -1402,11 +1380,6 @@ void func_map_init_statistics(func_map_t& fmap)
     fmap[DPNPFuncName::DPNP_FN_MEAN][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_mean_default_c<float, float>};
     fmap[DPNPFuncName::DPNP_FN_MEAN][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_mean_default_c<double, double>};
 
-    fmap[DPNPFuncName::DPNP_FN_MEAN_EXT][eft_INT][eft_INT] = {eft_DBL, (void*)dpnp_mean_ext_c<int32_t, double>};
-    fmap[DPNPFuncName::DPNP_FN_MEAN_EXT][eft_LNG][eft_LNG] = {eft_DBL, (void*)dpnp_mean_ext_c<int64_t, double>};
-    fmap[DPNPFuncName::DPNP_FN_MEAN_EXT][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_mean_ext_c<float, float>};
-    fmap[DPNPFuncName::DPNP_FN_MEAN_EXT][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_mean_ext_c<double, double>};
-
     fmap[DPNPFuncName::DPNP_FN_MEDIAN][eft_INT][eft_INT] = {eft_DBL, (void*)dpnp_median_default_c<int32_t, double>};
     fmap[DPNPFuncName::DPNP_FN_MEDIAN][eft_LNG][eft_LNG] = {eft_DBL, (void*)dpnp_median_default_c<int64_t, double>};
     fmap[DPNPFuncName::DPNP_FN_MEDIAN][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_median_default_c<float, float>};
diff --git a/dpnp/backend/src/dpnp_random_state.cpp b/dpnp/backend/src/dpnp_random_state.cpp
index ddae4eba2443..12db01a1cd81 100644
--- a/dpnp/backend/src/dpnp_random_state.cpp
+++ b/dpnp/backend/src/dpnp_random_state.cpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2022, Intel Corporation
+// Copyright (c) 2022-2023, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -52,3 +52,16 @@ void MT19937_Delete(mt19937_struct *mt19937) {
     mt19937->engine = nullptr;
     delete engine;
 }
+
+void MCG59_InitScalarSeed(mcg59_struct* mcg59, DPCTLSyclQueueRef q_ref, uint64_t seed)
+{
+    sycl::queue* q = reinterpret_cast<sycl::queue*>(q_ref);
+    mcg59->engine = new mkl_rng::mcg59(*q, seed);
+}
+
+void MCG59_Delete(mcg59_struct* mcg59)
+{
+    mkl_rng::mcg59* engine = static_cast<mkl_rng::mcg59*>(mcg59->engine);
+    mcg59->engine = nullptr;
+    delete engine;
+}
diff --git a/dpnp/backend/src/dpnp_random_state.hpp b/dpnp/backend/src/dpnp_random_state.hpp
index 0e37c3c99b51..7392ca3f09ac 100644
--- a/dpnp/backend/src/dpnp_random_state.hpp
+++ b/dpnp/backend/src/dpnp_random_state.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2022, Intel Corporation
+// Copyright (c) 2022-2023, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -47,12 +47,22 @@
 
 #include <dpctl_sycl_interface.h>
 
-// Structure storing MKL engine for MT199374x32x10 algorithm
-struct mt19937_struct
+// Structure storing a base MKL engine
+struct engine_struct
 {
     void* engine;
 };
 
+// Structure storing MKL engine for MT199374x32x10 generator
+struct mt19937_struct : engine_struct
+{
+};
+
+// Structure storing MKL engine for MCG59 generator
+struct mcg59_struct : engine_struct
+{
+};
+
 /**
  * @ingroup BACKEND_API
  * @brief Create a MKL engine from scalar seed.
@@ -88,4 +98,26 @@ INP_DLLEXPORT void MT19937_InitVectorSeed(mt19937_struct *mt19937, DPCTLSyclQueu
  */
 INP_DLLEXPORT void MT19937_Delete(mt19937_struct *mt19937);
 
+/**
+ * @ingroup BACKEND_API
+ * @brief Create a MKL engine from scalar seed.
+ *
+ * Invoke a common seed initialization of the engine for MCG59 algorithm.
+ *
+ * @param [in]  mcg59         A structure with MKL engine which will be filled with generated value by MKL.
+ * @param [in]  q_ref         A refference on SYCL queue which will be used to obtain random numbers.
+ * @param [in]  seed          An initial condition of the generator state.
+ */
+INP_DLLEXPORT void MCG59_InitScalarSeed(mcg59_struct* mcg59, DPCTLSyclQueueRef q_ref, uint64_t seed);
+
+/**
+ * @ingroup BACKEND_API
+ * @brief Release a MKL engine.
+ *
+ * Release all resource required for storing of the MKL engine.
+ *
+ * @param [in]  mcg59         A structure with the MKL engine.
+ */
+INP_DLLEXPORT void MCG59_Delete(mcg59_struct* mcg59);
+
 #endif // BACKEND_RANDOM_STATE_H
diff --git a/dpnp/backend/src/queue_sycl.cpp b/dpnp/backend/src/queue_sycl.cpp
index 55f78230d64e..262ed9833b76 100644
--- a/dpnp/backend/src/queue_sycl.cpp
+++ b/dpnp/backend/src/queue_sycl.cpp
@@ -35,6 +35,7 @@
 sycl::queue* backend_sycl::queue = nullptr;
 #endif
 mkl_rng::mt19937* backend_sycl::rng_engine = nullptr;
+mkl_rng::mcg59* backend_sycl::rng_mcg59_engine = nullptr;
 
 static void dpnpc_show_mathlib_version()
 {
@@ -226,6 +227,7 @@ void backend_sycl::backend_sycl_rng_engine_init(size_t seed)
         backend_sycl::destroy_rng_engine();
     }
     rng_engine = new mkl_rng::mt19937(DPNP_QUEUE, seed);
+    rng_mcg59_engine = new mkl_rng::mcg59(DPNP_QUEUE, seed);
 }
 
 void dpnp_queue_initialize_c(QueueOptions selector)
diff --git a/dpnp/backend/src/queue_sycl.hpp b/dpnp/backend/src/queue_sycl.hpp
index 8683fdd5737d..faf8fa932f8e 100644
--- a/dpnp/backend/src/queue_sycl.hpp
+++ b/dpnp/backend/src/queue_sycl.hpp
@@ -56,8 +56,9 @@
 
 namespace mkl_rng = oneapi::mkl::rng;
 
-#define DPNP_QUEUE      backend_sycl::get_queue()
-#define DPNP_RNG_ENGINE backend_sycl::get_rng_engine()
+#define DPNP_QUEUE            backend_sycl::get_queue()
+#define DPNP_RNG_ENGINE       backend_sycl::get_rng_engine()
+#define DPNP_RNG_MCG59_ENGINE backend_sycl::get_rng_mcg59_engine()
 
 /**
  * This is container for the SYCL queue, random number generation engine and related functions like queue and engine
@@ -70,7 +71,10 @@ class backend_sycl
 #if defined(DPNP_LOCAL_QUEUE)
     static sycl::queue* queue; /**< contains SYCL queue pointer initialized in @ref backend_sycl_queue_init */
 #endif
-    static mkl_rng::mt19937* rng_engine; /**< RNG engine ptr. initialized in @ref backend_sycl_rng_engine_init */
+    static mkl_rng::mt19937*
+        rng_engine;       /**< RNG MT19937 engine ptr. initialized in @ref backend_sycl_rng_engine_init */
+    static mkl_rng::mcg59*
+        rng_mcg59_engine; /**< RNG MCG59 engine ptr. initialized in @ref backend_sycl_rng_engine_init */
 
     static void destroy()
     {
@@ -84,7 +88,10 @@ class backend_sycl
     static void destroy_rng_engine()
     {
         delete rng_engine;
+        delete rng_mcg59_engine;
+
         rng_engine = nullptr;
+        rng_mcg59_engine = nullptr;
     }
 
 public:
@@ -118,7 +125,7 @@ class backend_sycl
     static bool backend_sycl_is_cpu();
 
     /**
-     * Initialize @ref rng_engine
+     * Initialize @ref rng_engine and @ref rng_mcg59_engine
      */
     static void backend_sycl_rng_engine_init(size_t seed = 1);
 
@@ -159,6 +166,18 @@ class backend_sycl
         }
         return *rng_engine;
     }
+
+    /**
+     * Return the @ref rng_mcg59_engine to the user
+     */
+    static mkl_rng::mcg59& get_rng_mcg59_engine()
+    {
+        if (!rng_engine)
+        {
+            backend_sycl_rng_engine_init();
+        }
+        return *rng_mcg59_engine;
+    }
 };
 
 #endif // QUEUE_SYCL_H
diff --git a/dpnp/dpnp_algo/dpnp_algo.pxd b/dpnp/dpnp_algo/dpnp_algo.pxd
index 0120aa6b453a..4bea0171b37c 100644
--- a/dpnp/dpnp_algo/dpnp_algo.pxd
+++ b/dpnp/dpnp_algo/dpnp_algo.pxd
@@ -36,8 +36,6 @@ cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName":  # need this na
     cdef enum DPNPFuncName "DPNPFuncName":
         DPNP_FN_ABSOLUTE
         DPNP_FN_ABSOLUTE_EXT
-        DPNP_FN_ADD
-        DPNP_FN_ADD_EXT
         DPNP_FN_ALL
         DPNP_FN_ALL_EXT
         DPNP_FN_ALLCLOSE
@@ -97,8 +95,6 @@ cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName":  # need this na
         DPNP_FN_COS_EXT
         DPNP_FN_COSH
         DPNP_FN_COSH_EXT
-        DPNP_FN_COV
-        DPNP_FN_COV_EXT
         DPNP_FN_COUNT_NONZERO
         DPNP_FN_COUNT_NONZERO_EXT
         DPNP_FN_CROSS
@@ -117,7 +113,6 @@ cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName":  # need this na
         DPNP_FN_DIAG_INDICES_EXT
         DPNP_FN_DIAGONAL
         DPNP_FN_DIAGONAL_EXT
-        DPNP_FN_DIVIDE
         DPNP_FN_DOT
         DPNP_FN_DOT_EXT
         DPNP_FN_EDIFF1D
@@ -193,8 +188,6 @@ cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName":  # need this na
         DPNP_FN_MAX_EXT
         DPNP_FN_MAXIMUM
         DPNP_FN_MAXIMUM_EXT
-        DPNP_FN_MEAN
-        DPNP_FN_MEAN_EXT
         DPNP_FN_MEDIAN
         DPNP_FN_MEDIAN_EXT
         DPNP_FN_MIN
@@ -203,8 +196,6 @@ cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName":  # need this na
         DPNP_FN_MINIMUM_EXT
         DPNP_FN_MODF
         DPNP_FN_MODF_EXT
-        DPNP_FN_MULTIPLY
-        DPNP_FN_MULTIPLY_EXT
         DPNP_FN_NANVAR
         DPNP_FN_NANVAR_EXT
         DPNP_FN_NEGATIVE
@@ -323,8 +314,6 @@ cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName":  # need this na
         DPNP_FN_SQUARE_EXT
         DPNP_FN_STD
         DPNP_FN_STD_EXT
-        DPNP_FN_SUBTRACT
-        DPNP_FN_SUBTRACT_EXT
         DPNP_FN_SUM
         DPNP_FN_SUM_EXT
         DPNP_FN_SVD
@@ -338,7 +327,6 @@ cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName":  # need this na
         DPNP_FN_TRACE
         DPNP_FN_TRACE_EXT
         DPNP_FN_TRANSPOSE
-        DPNP_FN_TRANSPOSE_EXT
         DPNP_FN_TRAPZ
         DPNP_FN_TRAPZ_EXT
         DPNP_FN_TRI
@@ -371,8 +359,6 @@ cdef extern from "dpnp_iface_fptr.hpp":
     struct DPNPFuncData:
         DPNPFuncType return_type
         void * ptr
-        DPNPFuncType return_type_no_fp64
-        void *ptr_no_fp64
 
     DPNPFuncData get_dpnp_function_ptr(DPNPFuncName name, DPNPFuncType first_type, DPNPFuncType second_type) except +
 
@@ -526,8 +512,6 @@ cpdef dpnp_descriptor dpnp_copy(dpnp_descriptor x1)
 """
 Mathematical functions
 """
-cpdef dpnp_descriptor dpnp_add(dpnp_descriptor x1_obj, dpnp_descriptor x2_obj, object dtype=*,
-                               dpnp_descriptor out=*, object where=*)
 cpdef dpnp_descriptor dpnp_arctan2(dpnp_descriptor x1_obj, dpnp_descriptor x2_obj, object dtype=*,
                                    dpnp_descriptor out=*, object where=*)
 cpdef dpnp_descriptor dpnp_hypot(dpnp_descriptor x1_obj, dpnp_descriptor x2_obj, object dtype=*,
@@ -536,29 +520,22 @@ cpdef dpnp_descriptor dpnp_maximum(dpnp_descriptor x1_obj, dpnp_descriptor x2_ob
                                    dpnp_descriptor out=*, object where=*)
 cpdef dpnp_descriptor dpnp_minimum(dpnp_descriptor x1_obj, dpnp_descriptor x2_obj, object dtype=*,
                                    dpnp_descriptor out=*, object where=*)
-cpdef dpnp_descriptor dpnp_multiply(dpnp_descriptor x1_obj, dpnp_descriptor x2_obj, object dtype=*,
-                                    dpnp_descriptor out=*, object where=*)
 cpdef dpnp_descriptor dpnp_negative(dpnp_descriptor array1)
 cpdef dpnp_descriptor dpnp_power(dpnp_descriptor x1_obj, dpnp_descriptor x2_obj, object dtype=*,
                                  dpnp_descriptor out=*, object where=*)
 cpdef dpnp_descriptor dpnp_remainder(dpnp_descriptor x1_obj, dpnp_descriptor x2_obj, object dtype=*,
                                      dpnp_descriptor out=*, object where=*)
-cpdef dpnp_descriptor dpnp_subtract(dpnp_descriptor x1_obj, dpnp_descriptor x2_obj, object dtype=*,
-                                    dpnp_descriptor out=*, object where=*)
 
 
 """
 Array manipulation routines
 """
 cpdef dpnp_descriptor dpnp_repeat(dpnp_descriptor array1, repeats, axes=*)
-cpdef dpnp_descriptor dpnp_transpose(dpnp_descriptor array1, axes=*)
 
 
 """
 Statistics functions
 """
-cpdef dpnp_descriptor dpnp_cov(dpnp_descriptor array1)
-cpdef object dpnp_mean(dpnp_descriptor a, axis)
 cpdef dpnp_descriptor dpnp_min(dpnp_descriptor a, axis)
 
 
diff --git a/dpnp/dpnp_algo/dpnp_algo.pyx b/dpnp/dpnp_algo/dpnp_algo.pyx
index 8b6d4be73e94..2468a8fa4c0c 100644
--- a/dpnp/dpnp_algo/dpnp_algo.pyx
+++ b/dpnp/dpnp_algo/dpnp_algo.pyx
@@ -497,14 +497,8 @@ cdef utils.dpnp_descriptor call_fptr_2in_1out_strides(DPNPFuncName fptr_name,
     result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(x1_obj, x2_obj)
 
     # get FPTR function and return type
-    cdef fptr_2in_1out_strides_t func = NULL
-    cdef DPNPFuncType return_type = DPNP_FT_NONE
-    if result_sycl_device.has_aspect_fp64:
-        return_type = kernel_data.return_type
-        func = < fptr_2in_1out_strides_t > kernel_data.ptr
-    else:
-        return_type = kernel_data.return_type_no_fp64
-        func = < fptr_2in_1out_strides_t > kernel_data.ptr_no_fp64
+    cdef fptr_2in_1out_strides_t func = < fptr_2in_1out_strides_t > kernel_data.ptr
+    cdef DPNPFuncType return_type = kernel_data.return_type
 
     # check 'out' parameter data
     if out is not None:
diff --git a/dpnp/dpnp_algo/dpnp_algo_linearalgebra.pxi b/dpnp/dpnp_algo/dpnp_algo_linearalgebra.pxi
index f9eac4ffd35b..407d3466c9b8 100644
--- a/dpnp/dpnp_algo/dpnp_algo_linearalgebra.pxi
+++ b/dpnp/dpnp_algo/dpnp_algo_linearalgebra.pxi
@@ -40,7 +40,6 @@ __all__ += [
     "dpnp_inner",
     "dpnp_kron",
     "dpnp_matmul",
-    "dpnp_outer"
 ]
 
 
@@ -378,27 +377,3 @@ cpdef utils.dpnp_descriptor dpnp_matmul(utils.dpnp_descriptor in_array1, utils.d
     c_dpctl.DPCTLEvent_Delete(event_ref)
 
     return result
-
-
-cpdef utils.dpnp_descriptor dpnp_outer(utils.dpnp_descriptor array1, utils.dpnp_descriptor array2):
-    cdef shape_type_c result_shape = (array1.size, array2.size)
-    result_type = numpy.promote_types(array1.dtype, array1.dtype)
-
-    result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(array1, array2)
-
-    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(result_shape,
-                                                                             result_type,
-                                                                             None,
-                                                                             device=result_sycl_device,
-                                                                             usm_type=result_usm_type,
-                                                                             sycl_queue=result_sycl_queue)
-
-    result_flatiter = result.get_pyobj().flat
-    array1_flatiter = array1.get_pyobj().flat
-    array2_flatiter = array2.get_pyobj().flat
-
-    for idx1 in range(array1.size):
-        for idx2 in range(array2.size):
-            result_flatiter[idx1 * array2.size + idx2] = array1_flatiter[idx1] * array2_flatiter[idx2]
-
-    return result
diff --git a/dpnp/dpnp_algo/dpnp_algo_manipulation.pxi b/dpnp/dpnp_algo/dpnp_algo_manipulation.pxi
index b9234dbe5ab2..94a58b057bff 100644
--- a/dpnp/dpnp_algo/dpnp_algo_manipulation.pxi
+++ b/dpnp/dpnp_algo/dpnp_algo_manipulation.pxi
@@ -42,20 +42,10 @@ __all__ += [
     "dpnp_expand_dims",
     "dpnp_repeat",
     "dpnp_reshape",
-    "dpnp_transpose",
 ]
 
 
 # C function pointer to the C library template functions
-ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_custom_elemwise_transpose_1in_1out_t)(c_dpctl.DPCTLSyclQueueRef,
-                                                                               void * ,
-                                                                               shape_elem_type * ,
-                                                                               shape_elem_type * ,
-                                                                               shape_elem_type * ,
-                                                                               size_t,
-                                                                               void * ,
-                                                                               size_t,
-                                                                               const c_dpctl.DPCTLEventVectorRef)
 ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_repeat_t)(c_dpctl.DPCTLSyclQueueRef,
                                                         const void *, void * , const size_t , const size_t,
                                                         const c_dpctl.DPCTLEventVectorRef)
@@ -229,67 +219,3 @@ cpdef utils.dpnp_descriptor dpnp_reshape(utils.dpnp_descriptor array1, newshape,
                                                usm_type=array1_obj.usm_type,
                                                sycl_queue=array1_obj.sycl_queue),
                                     copy_when_nondefault_queue=False)
-
-
-cpdef utils.dpnp_descriptor dpnp_transpose(utils.dpnp_descriptor array1, axes=None):
-    cdef shape_type_c input_shape = array1.shape
-    cdef size_t input_shape_size = array1.ndim
-    cdef shape_type_c result_shape = shape_type_c(input_shape_size, 1)
-
-    cdef shape_type_c permute_axes
-    if axes is None:
-        """
-        template to do transpose a tensor
-        input_shape=[2, 3, 4]
-        permute_axes=[2, 1, 0]
-        after application `permute_axes` to `input_shape` result:
-        result_shape=[4, 3, 2]
-
-        'do nothing' axes variable is `permute_axes=[0, 1, 2]`
-
-        test: pytest tests/third_party/cupy/manipulation_tests/test_transpose.py::TestTranspose::test_external_transpose_all
-        """
-        permute_axes = list(reversed([i for i in range(input_shape_size)]))
-    else:
-        permute_axes = utils.normalize_axis(axes, input_shape_size)
-
-    for i in range(input_shape_size):
-        """ construct output shape """
-        result_shape[i] = input_shape[permute_axes[i]]
-
-    # convert string type names (array.dtype) to C enum DPNPFuncType
-    cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(array1.dtype)
-
-    # get the FPTR data structure
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_TRANSPOSE_EXT, param1_type, param1_type)
-
-    array1_obj = array1.get_array()
-
-    # ceate result array with type given by FPTR data
-    cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape,
-                                                                       kernel_data.return_type,
-                                                                       None,
-                                                                       device=array1_obj.sycl_device,
-                                                                       usm_type=array1_obj.usm_type,
-                                                                       sycl_queue=array1_obj.sycl_queue)
-    result_sycl_queue = result.get_array().sycl_queue
-
-    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
-    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
-
-    cdef fptr_custom_elemwise_transpose_1in_1out_t func = <fptr_custom_elemwise_transpose_1in_1out_t > kernel_data.ptr
-    # call FPTR function
-    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref,
-                                                    array1.get_data(),
-                                                    input_shape.data(),
-                                                    result_shape.data(),
-                                                    permute_axes.data(),
-                                                    input_shape_size,
-                                                    result.get_data(),
-                                                    array1.size,
-                                                    NULL)  # dep_events_ref
-
-    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
-    c_dpctl.DPCTLEvent_Delete(event_ref)
-
-    return result
diff --git a/dpnp/dpnp_algo/dpnp_algo_mathematical.pxi b/dpnp/dpnp_algo/dpnp_algo_mathematical.pxi
index 3a002fdd4ba7..4860feb72269 100644
--- a/dpnp/dpnp_algo/dpnp_algo_mathematical.pxi
+++ b/dpnp/dpnp_algo/dpnp_algo_mathematical.pxi
@@ -37,7 +37,6 @@ and the rest of the library
 
 __all__ += [
     "dpnp_absolute",
-    "dpnp_add",
     "dpnp_arctan2",
     "dpnp_around",
     "dpnp_ceil",
@@ -57,7 +56,6 @@ __all__ += [
     "dpnp_maximum",
     "dpnp_minimum",
     "dpnp_modf",
-    "dpnp_multiply",
     "dpnp_nancumprod",
     "dpnp_nancumsum",
     "dpnp_nanprod",
@@ -67,7 +65,6 @@ __all__ += [
     "dpnp_prod",
     "dpnp_remainder",
     "dpnp_sign",
-    "dpnp_subtract",
     "dpnp_sum",
     "dpnp_trapz",
     "dpnp_trunc"
@@ -123,14 +120,6 @@ cpdef utils.dpnp_descriptor dpnp_absolute(utils.dpnp_descriptor x1):
     return result
 
 
-cpdef utils.dpnp_descriptor dpnp_add(utils.dpnp_descriptor x1_obj,
-                                     utils.dpnp_descriptor x2_obj,
-                                     object dtype=None,
-                                     utils.dpnp_descriptor out=None,
-                                     object where=True):
-    return call_fptr_2in_1out_strides(DPNP_FN_ADD_EXT, x1_obj, x2_obj, dtype, out, where)
-
-
 cpdef utils.dpnp_descriptor dpnp_arctan2(utils.dpnp_descriptor x1_obj,
                                          utils.dpnp_descriptor x2_obj,
                                          object dtype=None,
@@ -426,14 +415,6 @@ cpdef tuple dpnp_modf(utils.dpnp_descriptor x1):
     return (result1.get_pyobj(), result2.get_pyobj())
 
 
-cpdef utils.dpnp_descriptor dpnp_multiply(utils.dpnp_descriptor x1_obj,
-                                          utils.dpnp_descriptor x2_obj,
-                                          object dtype=None,
-                                          utils.dpnp_descriptor out=None,
-                                          object where=True):
-    return call_fptr_2in_1out_strides(DPNP_FN_MULTIPLY_EXT, x1_obj, x2_obj, dtype, out, where)
-
-
 cpdef utils.dpnp_descriptor dpnp_nancumprod(utils.dpnp_descriptor x1):
     cur_x1 = dpnp_copy(x1).get_pyobj()
 
@@ -586,14 +567,6 @@ cpdef utils.dpnp_descriptor dpnp_sign(utils.dpnp_descriptor x1):
     return call_fptr_1in_1out_strides(DPNP_FN_SIGN_EXT, x1)
 
 
-cpdef utils.dpnp_descriptor dpnp_subtract(utils.dpnp_descriptor x1_obj,
-                                          utils.dpnp_descriptor x2_obj,
-                                          object dtype=None,
-                                          utils.dpnp_descriptor out=None,
-                                          object where=True):
-    return call_fptr_2in_1out_strides(DPNP_FN_SUBTRACT_EXT, x1_obj, x2_obj, dtype, out, where)
-
-
 cpdef utils.dpnp_descriptor dpnp_sum(utils.dpnp_descriptor x1,
                                      object axis=None,
                                      object dtype=None,
diff --git a/dpnp/dpnp_algo/dpnp_algo_statistics.pxi b/dpnp/dpnp_algo/dpnp_algo_statistics.pxi
index d2868a8ee042..8baa93651ab3 100644
--- a/dpnp/dpnp_algo/dpnp_algo_statistics.pxi
+++ b/dpnp/dpnp_algo/dpnp_algo_statistics.pxi
@@ -38,9 +38,7 @@ and the rest of the library
 __all__ += [
     "dpnp_average",
     "dpnp_correlate",
-    "dpnp_cov",
     "dpnp_max",
-    "dpnp_mean",
     "dpnp_median",
     "dpnp_min",
     "dpnp_nanvar",
@@ -179,49 +177,6 @@ cpdef utils.dpnp_descriptor dpnp_correlate(utils.dpnp_descriptor x1, utils.dpnp_
     return result
 
 
-cpdef utils.dpnp_descriptor dpnp_cov(utils.dpnp_descriptor array1):
-    cdef shape_type_c input_shape = array1.shape
-
-    if array1.ndim == 1:
-        input_shape.insert(input_shape.begin(), 1)
-
-    # convert string type names (array.dtype) to C enum DPNPFuncType
-    cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(array1.dtype)
-
-    # get the FPTR data structure
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_COV_EXT, param1_type, param1_type)
-
-    array1_obj = array1.get_array()
-
-    # ceate result array with type given by FPTR data
-    cdef shape_type_c result_shape = (input_shape[0], input_shape[0])
-    cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape,
-                                                                       kernel_data.return_type,
-                                                                       None,
-                                                                       device=array1_obj.sycl_device,
-                                                                       usm_type=array1_obj.usm_type,
-                                                                       sycl_queue=array1_obj.sycl_queue)
-
-    result_sycl_queue = result.get_array().sycl_queue
-
-    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
-    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
-
-    cdef fptr_custom_cov_1in_1out_t func = <fptr_custom_cov_1in_1out_t > kernel_data.ptr
-    # call FPTR function
-    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref,
-                                                    array1.get_data(),
-                                                    result.get_data(),
-                                                    input_shape[0],
-                                                    input_shape[1],
-                                                    NULL)  # dep_events_ref
-
-    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
-    c_dpctl.DPCTLEvent_Delete(event_ref)
-
-    return result
-
-
 cdef utils.dpnp_descriptor _dpnp_max(utils.dpnp_descriptor x1, _axis_, shape_type_c result_shape):
     cdef shape_type_c x1_shape = x1.shape
     cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(x1.dtype)
@@ -302,152 +257,6 @@ cpdef utils.dpnp_descriptor dpnp_max(utils.dpnp_descriptor x1, axis):
 
     return _dpnp_max(x1, axis_, output_shape)
 
-
-cpdef utils.dpnp_descriptor _dpnp_mean(utils.dpnp_descriptor x1):
-    cdef shape_type_c x1_shape = x1.shape
-    cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(x1.dtype)
-
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_MEAN_EXT, param1_type, param1_type)
-
-    x1_obj = x1.get_array()
-
-    cdef utils.dpnp_descriptor result = utils.create_output_descriptor((1,),
-                                                                       kernel_data.return_type,
-                                                                       None,
-                                                                       device=x1_obj.sycl_device,
-                                                                       usm_type=x1_obj.usm_type,
-                                                                       sycl_queue=x1_obj.sycl_queue)
-
-    result_sycl_queue = result.get_array().sycl_queue
-
-    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
-    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
-
-    cdef custom_statistic_1in_1out_func_ptr_t func = <custom_statistic_1in_1out_func_ptr_t > kernel_data.ptr
-
-    # stub for interface support
-    cdef shape_type_c axis
-    cdef Py_ssize_t axis_size = 0
-
-    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref,
-                                                    x1.get_data(),
-                                                    result.get_data(),
-                                                    x1_shape.data(),
-                                                    x1.ndim,
-                                                    axis.data(),
-                                                    axis_size,
-                                                    NULL)  # dep_events_ref
-
-    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
-    c_dpctl.DPCTLEvent_Delete(event_ref)
-
-    return result
-
-
-cpdef object dpnp_mean(utils.dpnp_descriptor x1, axis):
-    cdef shape_type_c output_shape
-
-    if axis is None:
-        return _dpnp_mean(x1).get_pyobj()
-
-    cdef long x1_size = x1.size
-    cdef shape_type_c x1_shape = x1.shape
-
-    if x1.dtype == dpnp.float32:
-        res_type = dpnp.float32
-    else:
-        res_type = dpnp.float64
-
-    if x1_size == 0:
-        return dpnp.array([dpnp.nan], dtype=res_type)
-
-    if isinstance(axis, int):
-        axis_ = tuple([axis])
-    else:
-        axis_ = axis
-
-    if axis_ is None:
-        output_shape.push_back(1)
-    else:
-        output_shape = (0, ) * (len(x1_shape) - len(axis_))
-        ind = 0
-        for id, shape_axis in enumerate(x1_shape):
-            if id not in axis_:
-                output_shape[ind] = shape_axis
-                ind += 1
-
-    cdef long prod = 1
-    for i in range(len(output_shape)):
-        if output_shape[i] != 0:
-            prod *= output_shape[i]
-
-    result_array = [None] * prod
-    input_shape_offsets = [None] * len(x1_shape)
-    acc = 1
-
-    for i in range(len(x1_shape)):
-        ind = len(x1_shape) - 1 - i
-        input_shape_offsets[ind] = acc
-        acc *= x1_shape[ind]
-
-    output_shape_offsets = [None] * len(x1_shape)
-    acc = 1
-
-    if axis_ is not None:
-        for i in range(len(output_shape)):
-            ind = len(output_shape) - 1 - i
-            output_shape_offsets[ind] = acc
-            acc *= output_shape[ind]
-            result_offsets = input_shape_offsets[:]  # need copy. not a reference
-        for i in axis_:
-            result_offsets[i] = 0
-
-    for source_idx in range(x1_size):
-
-        # reconstruct x,y,z from linear source_idx
-        xyz = []
-        remainder = source_idx
-        for i in input_shape_offsets:
-            quotient, remainder = divmod(remainder, i)
-            xyz.append(quotient)
-
-        # extract result axis
-        result_axis = []
-        if axis_ is None:
-            result_axis = xyz
-        else:
-            for idx, offset in enumerate(xyz):
-                if idx not in axis_:
-                    result_axis.append(offset)
-
-        # Construct result offset
-        result_offset = 0
-        if axis_ is not None:
-            for i, result_axis_val in enumerate(result_axis):
-                result_offset += (output_shape_offsets[i] * result_axis_val)
-
-        input_elem = input.get_pyobj().item(source_idx)
-        if axis_ is None:
-            if result_array[0] is None:
-                result_array[0] = input_elem
-            else:
-                result_array[0] += input_elem
-        else:
-            if result_array[result_offset] is None:
-                result_array[result_offset] = input_elem
-            else:
-                result_array[result_offset] += input_elem
-
-    del_ = x1_size
-    if axis_ is not None:
-        for i in range(len(x1_shape)):
-            if i not in axis_:
-                del_ = del_ / x1_shape[i]
-    dpnp_array = dpnp.array(result_array, dtype=input.dtype)
-    dpnp_result_array = dpnp.reshape(dpnp_array, output_shape)
-    return dpnp_result_array / del_
-
-
 cpdef utils.dpnp_descriptor dpnp_median(utils.dpnp_descriptor array1):
     cdef shape_type_c x1_shape = array1.shape
     cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(array1.dtype)
diff --git a/dpnp/dpnp_algo/dpnp_elementwise_common.py b/dpnp/dpnp_algo/dpnp_elementwise_common.py
index 9c2383e57a0b..7a54a1947955 100644
--- a/dpnp/dpnp_algo/dpnp_elementwise_common.py
+++ b/dpnp/dpnp_algo/dpnp_elementwise_common.py
@@ -35,13 +35,62 @@
     BinaryElementwiseFunc
 )
 import dpctl.tensor._tensor_impl as ti
+import dpctl.tensor as dpt
+import dpctl
+
+import numpy
 
 
 __all__ = [
-    "dpnp_divide"
+    "dpnp_add",
+    "dpnp_divide",
+    "dpnp_multiply",
+    "dpnp_subtract"
 ]
 
 
+_add_docstring_ = """
+add(x1, x2, out=None, order='K')
+
+Calculates the sum for each element `x1_i` of the input array `x1` with
+the respective element `x2_i` of the input array `x2`.
+
+Args:
+    x1 (dpnp.ndarray):
+        First input array, expected to have numeric data type.
+    x2 (dpnp.ndarray):
+        Second input array, also expected to have numeric data type.
+    out ({None, dpnp.ndarray}, optional):
+        Output array to populate.
+        Array have the correct shape and the expected data type.
+    order ("C","F","A","K", None, optional):
+        Memory layout of the newly output array, if parameter `out` is `None`.
+        Default: "K".
+Returns:
+    dpnp.ndarray:
+        an array containing the result of element-wise division. The data type
+        of the returned array is determined by the Type Promotion Rules.
+"""
+
+def dpnp_add(x1, x2, out=None, order='K'):
+    """
+    Invokes add() from dpctl.tensor implementation for add() function.
+    TODO: add a pybind11 extension of add() from OneMKL VM where possible
+    and would be performance effective.
+
+    """
+
+    # dpctl.tensor only works with usm_ndarray or scalar
+    x1_usm_or_scalar = dpnp.get_usm_ndarray_or_scalar(x1)
+    x2_usm_or_scalar = dpnp.get_usm_ndarray_or_scalar(x2)
+    out_usm = None if out is None else dpnp.get_usm_ndarray(out)
+
+    func = BinaryElementwiseFunc("add", ti._add_result_type, ti._add,
+                                 _add_docstring_, ti._add_inplace)
+    res_usm = func(x1_usm_or_scalar, x2_usm_or_scalar, out=out_usm, order=order)
+    return dpnp_array._create_from_usm_ndarray(res_usm)
+
+
 _divide_docstring_ = """
 divide(x1, x2, out=None, order='K')
 
@@ -80,11 +129,115 @@ def _call_divide(src1, src2, dst, sycl_queue, depends=[]):
             return vmi._div(sycl_queue, src1, src2, dst, depends)
         return ti._divide(src1, src2, dst, sycl_queue, depends)
 
+    def _call_divide_inplace(lhs, rhs, sycl_queue, depends=[]):
+        """In place workaround until dpctl.tensor provides the functionality."""
+
+        # allocate temporary memory for out array
+        out = dpt.empty_like(lhs, dtype=dpnp.result_type(lhs.dtype, rhs.dtype))
+
+        # call a general callback
+        div_ht_, div_ev_ = _call_divide(lhs, rhs, out, sycl_queue, depends)
+
+        # store the result into left input array and return events
+        cp_ht_, cp_ev_ = ti._copy_usm_ndarray_into_usm_ndarray(src=out, dst=lhs, sycl_queue=sycl_queue, depends=[div_ev_])
+        dpctl.SyclEvent.wait_for([div_ht_])
+        return (cp_ht_, cp_ev_)
+
+    # dpctl.tensor only works with usm_ndarray or scalar
+    x1_usm_or_scalar = dpnp.get_usm_ndarray_or_scalar(x1)
+    x2_usm_or_scalar = dpnp.get_usm_ndarray_or_scalar(x2)
+    out_usm = None if out is None else dpnp.get_usm_ndarray(out)
+
+    func = BinaryElementwiseFunc("divide", ti._divide_result_type, _call_divide,
+                                 _divide_docstring_, _call_divide_inplace)
+    res_usm = func(x1_usm_or_scalar, x2_usm_or_scalar, out=out_usm, order=order)
+    return dpnp_array._create_from_usm_ndarray(res_usm)
+
+
+_multiply_docstring_ = """
+multiply(x1, x2, out=None, order='K')
+
+Calculates the product for each element `x1_i` of the input array `x1`
+with the respective element `x2_i` of the input array `x2`.
+
+Args:
+    x1 (dpnp.ndarray):
+        First input array, expected to have numeric data type.
+    x2 (dpnp.ndarray):
+        Second input array, also expected to have numeric data type.
+    out ({None, dpnp.ndarray}, optional):
+        Output array to populate.
+        Array have the correct shape and the expected data type.
+    order ("C","F","A","K", None, optional):
+        Memory layout of the newly output array, if parameter `out` is `None`.
+        Default: "K".
+Returns:
+    dpnp.ndarray:
+        an array containing the result of element-wise division. The data type
+        of the returned array is determined by the Type Promotion Rules.
+"""
+
+def dpnp_multiply(x1, x2, out=None, order='K'):
+    """
+    Invokes multiply() from dpctl.tensor implementation for multiply() function.
+    TODO: add a pybind11 extension of mul() from OneMKL VM where possible
+    and would be performance effective.
+
+    """
+
+    # dpctl.tensor only works with usm_ndarray or scalar
+    x1_usm_or_scalar = dpnp.get_usm_ndarray_or_scalar(x1)
+    x2_usm_or_scalar = dpnp.get_usm_ndarray_or_scalar(x2)
+    out_usm = None if out is None else dpnp.get_usm_ndarray(out)
+
+    func = BinaryElementwiseFunc("multiply", ti._multiply_result_type, ti._multiply,
+                                 _multiply_docstring_, ti._multiply_inplace)
+    res_usm = func(x1_usm_or_scalar, x2_usm_or_scalar, out=out_usm, order=order)
+    return dpnp_array._create_from_usm_ndarray(res_usm)
+
+
+_subtract_docstring_ = """
+subtract(x1, x2, out=None, order='K')
+
+Calculates the difference bewteen each element `x1_i` of the input
+array `x1` and the respective element `x2_i` of the input array `x2`.
+
+Args:
+    x1 (dpnp.ndarray):
+        First input array, expected to have numeric data type.
+    x2 (dpnp.ndarray):
+        Second input array, also expected to have numeric data type.
+    out ({None, dpnp.ndarray}, optional):
+        Output array to populate.
+        Array have the correct shape and the expected data type.
+    order ("C","F","A","K", None, optional):
+        Memory layout of the newly output array, if parameter `out` is `None`.
+        Default: "K".
+Returns:
+    dpnp.ndarray:
+        an array containing the result of element-wise division. The data type
+        of the returned array is determined by the Type Promotion Rules.
+"""
+
+def dpnp_subtract(x1, x2, out=None, order='K'):
+    """
+    Invokes subtract() from dpctl.tensor implementation for subtract() function.
+    TODO: add a pybind11 extension of sub() from OneMKL VM where possible
+    and would be performance effective.
+
+    """
+
+    # TODO: discuss with dpctl if the check is needed to be moved there
+    if not dpnp.isscalar(x1) and not dpnp.isscalar(x2) and x1.dtype == x2.dtype == dpnp.bool:
+        raise TypeError("DPNP boolean subtract, the `-` operator, is not supported, "
+                        "use the bitwise_xor, the `^` operator, or the logical_xor function instead.")
+
     # dpctl.tensor only works with usm_ndarray or scalar
     x1_usm_or_scalar = dpnp.get_usm_ndarray_or_scalar(x1)
     x2_usm_or_scalar = dpnp.get_usm_ndarray_or_scalar(x2)
     out_usm = None if out is None else dpnp.get_usm_ndarray(out)
 
-    func = BinaryElementwiseFunc("divide", ti._divide_result_type, _call_divide, _divide_docstring_)
+    func = BinaryElementwiseFunc("subtract", ti._subtract_result_type, ti._subtract,
+                                 _subtract_docstring_, ti._subtract_inplace)
     res_usm = func(x1_usm_or_scalar, x2_usm_or_scalar, out=out_usm, order=order)
     return dpnp_array._create_from_usm_ndarray(res_usm)
diff --git a/dpnp/dpnp_array.py b/dpnp/dpnp_array.py
index 3a3d4027d787..158dfd03ba4b 100644
--- a/dpnp/dpnp_array.py
+++ b/dpnp/dpnp_array.py
@@ -99,15 +99,8 @@ def get_array(self):
 
     @property
     def T(self):
-        """Shape-reversed view of the array.
-
-        If ndim < 2, then this is just a reference to the array itself.
-
-        """
-        if self.ndim < 2:
-            return self
-        else:
-            return dpnp.transpose(self)
+        """View of the transposed array."""
+        return self.transpose()
 
     def to_device(self, target_device):
         """
@@ -256,9 +249,15 @@ def __irshift__(self, other):
         dpnp.right_shift(self, other, out=self)
         return self
 
- # '__isub__',
+    def __isub__(self, other):
+        dpnp.subtract(self, other, out=self)
+        return self
+
  # '__iter__',
- # '__itruediv__',
+
+    def __itruediv__(self, other):
+        dpnp.true_divide(self, other, out=self)
+        return self
 
     def __ixor__(self, other):
         dpnp.bitwise_xor(self, other, out=self)
@@ -795,12 +794,12 @@ def max(self, axis=None, out=None, keepdims=numpy._NoValue, initial=numpy._NoVal
 
         return dpnp.max(self, axis, out, keepdims, initial, where)
 
-    def mean(self, axis=None):
+    def mean(self, axis=None, **kwargs):
         """
         Returns the average of the array elements.
         """
 
-        return dpnp.mean(self, axis)
+        return dpnp.mean(self, axis=axis, **kwargs)
 
     def min(self, axis=None, out=None, keepdims=numpy._NoValue, initial=numpy._NoValue, where=numpy._NoValue):
         """
@@ -877,14 +876,21 @@ def prod(self, axis=None, dtype=None, out=None, keepdims=False, initial=None, wh
  # 'real',
  # 'repeat',
 
-    def reshape(self, d0, *dn, order=b'C'):
+    def reshape(self, *sh, **kwargs):
         """
         Returns an array containing the same data with a new shape.
 
-        Refer to `dpnp.reshape` for full documentation.
+        For full documentation refer to :obj:`numpy.ndarray.reshape`.
 
-        .. seealso::
-           :meth:`numpy.ndarray.reshape`
+        Returns
+        -------
+        y : dpnp.ndarray
+            This will be a new view object if possible;
+            otherwise, it will be a copy.
+
+        See Also
+        --------
+        :obj:`dpnp.reshape` : Equivalent function.
 
         Notes
         -----
@@ -895,17 +901,9 @@ def reshape(self, d0, *dn, order=b'C'):
 
         """
 
-        if dn:
-            if not isinstance(d0, int):
-                msg_tmpl = "'{}' object cannot be interpreted as an integer"
-                raise TypeError(msg_tmpl.format(type(d0).__name__))
-            shape = [d0, *dn]
-        else:
-            shape = d0
-
-        shape_tup = dpnp.dpnp_utils._object_to_tuple(shape)
-
-        return dpnp.reshape(self, shape_tup)
+        if len(sh) == 1:
+            sh = sh[0]
+        return dpnp.reshape(self, sh, **kwargs)
 
  # 'resize',
 
@@ -947,14 +945,7 @@ def shape(self, newshape):
 
         """
 
-        dpnp.reshape(self, newshape)
-
-    @property
-    def shape(self):
-        """
-        """
-
-        return self._array_obj.shape
+        dpnp.reshape(self, newshape=newshape)
 
     @property
     def size(self):
@@ -993,7 +984,7 @@ def strides(self):
 
         return self._array_obj.strides
 
-    def sum(self, axis=None, dtype=None, out=None, keepdims=False, initial=0, where=True):
+    def sum(self, /, *, axis=None, dtype=None, keepdims=False, out=None, initial=0, where=True):
         """
         Returns the sum along a given axis.
 
@@ -1003,7 +994,7 @@ def sum(self, axis=None, dtype=None, out=None, keepdims=False, initial=0, where=
 
         """
 
-        return dpnp.sum(self, axis, dtype, out, keepdims, initial, where)
+        return dpnp.sum(self, axis=axis, dtype=dtype, out=out, keepdims=keepdims, initial=initial, where=where)
 
  # 'swapaxes',
 
@@ -1025,15 +1016,62 @@ def take(self, indices, axis=None, out=None, mode='raise'):
 
     def transpose(self, *axes):
         """
-        Returns a view of the array with axes permuted.
+        Returns a view of the array with axes transposed.
 
-        .. seealso::
-           :obj:`dpnp.transpose` for full documentation,
-           :meth:`numpy.ndarray.reshape`
+        For full documentation refer to :obj:`numpy.ndarray.transpose`.
+
+        Returns
+        -------
+        y : dpnp.ndarray
+            View of the array with its axes suitably permuted.
+
+        See Also
+        --------
+            :obj:`dpnp.transpose` : Equivalent function.
+            :obj:`dpnp.ndarray.ndarray.T` : Array property returning the array transposed.
+            :obj:`dpnp.ndarray.reshape` : Give a new shape to an array without changing its data.
+
+        Examples
+        --------
+        >>> import dpnp as dp
+        >>> a = dp.array([[1, 2], [3, 4]])
+        >>> a
+        array([[1, 2],
+               [3, 4]])
+        >>> a.transpose()
+        array([[1, 3],
+               [2, 4]])
+        >>> a.transpose((1, 0))
+        array([[1, 3],
+               [2, 4]])
+
+        >>> a = dp.array([1, 2, 3, 4])
+        >>> a
+        array([1, 2, 3, 4])
+        >>> a.transpose()
+        array([1, 2, 3, 4])
 
         """
 
-        return dpnp.transpose(self, axes)
+        ndim = self.ndim
+        if ndim < 2:
+            return self
+
+        axes_len = len(axes)
+        if axes_len == 1 and isinstance(axes[0], tuple):
+            axes = axes[0]
+
+        res = self.__new__(dpnp_array)
+        if ndim == 2 and axes_len == 0:
+            res._array_obj = self._array_obj.T
+        else:
+            if len(axes) == 0 or axes[0] is None:
+                # self.transpose().shape == self.shape[::-1]
+                # self.transpose(None).shape == self.shape[::-1]
+                axes = tuple((ndim - x - 1) for x in range(ndim))
+
+            res._array_obj = dpt.permute_dims(self._array_obj, axes)
+        return res
 
     def var(self, axis=None, dtype=None, out=None, ddof=0, keepdims=False):
         """
diff --git a/dpnp/dpnp_iface_linearalgebra.py b/dpnp/dpnp_iface_linearalgebra.py
index 2a643fc8469b..f26f9648b24f 100644
--- a/dpnp/dpnp_iface_linearalgebra.py
+++ b/dpnp/dpnp_iface_linearalgebra.py
@@ -73,7 +73,7 @@ def dot(x1, x2, out=None, **kwargs):
     y : dpnp.ndarray
         Returns the dot product of `x1` and `x2`.
         If `out` is given, then it is returned.
-    
+
     Limitations
     -----------
     Parameters `x1` and `x2` are supported as either scalar, :class:`dpnp.ndarray`
@@ -106,9 +106,10 @@ def dot(x1, x2, out=None, **kwargs):
         # get USM type and queue to copy scalar from the host memory into a USM allocation
         usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None)
 
-        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False,
+        # TODO: copy_when_strides=False (now it's done for faster implementation with transpose arrays)
+        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=True, copy_when_nondefault_queue=False,
                                            alloc_usm_type=usm_type, alloc_queue=queue)
-        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False,
+        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=True, copy_when_nondefault_queue=False,
                                            alloc_usm_type=usm_type, alloc_queue=queue)
         if x1_desc and x2_desc:
             if out is not None:
@@ -297,7 +298,7 @@ def matmul(x1, x2, out=None, **kwargs):
     return call_origin(numpy.matmul, x1, x2, out=out, **kwargs)
 
 
-def outer(x1, x2, **kwargs):
+def outer(x1, x2, out=None):
     """
     Returns the outer product of two arrays.
 
@@ -305,8 +306,8 @@ def outer(x1, x2, **kwargs):
 
     Limitations
     -----------
-        Parameters ``x1`` and ``x2`` are supported as :obj:`dpnp.ndarray`.
-        Keyword arguments ``kwargs`` are currently unsupported.
+        Parameters `x1` and `x2` are supported as either scalar, :class:`dpnp.ndarray`
+        or :class:`dpctl.tensor.usm_ndarray`, but both `x1` and `x2` can not be scalars at the same time.
         Otherwise the functions will be executed sequentially on CPU.
         Input array data types are limited by supported DPNP :ref:`Data types`.
 
@@ -322,21 +323,26 @@ def outer(x1, x2, **kwargs):
     >>> b = np.array([1, 2, 3])
     >>> result = np.outer(a, b)
     >>> [x for x in result]
-    [1, 2, 3, 1, 2, 3, 1, 2, 3]
+    array([[1, 2, 3],
+           [1, 2, 3],
+           [1, 2, 3]])
 
     """
+    x1_is_scalar = dpnp.isscalar(x1)
+    x2_is_scalar = dpnp.isscalar(x2)
 
-    if not kwargs:
-        if isinstance(x1, dpnp_array) and isinstance(x2, dpnp_array):
-            ravel = lambda x: x.flatten() if x.ndim > 1 else x
-            return ravel(x1)[:, None] * ravel(x2)[None, :]
-
-        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
-        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_nondefault_queue=False)
-        if x1_desc and x2_desc:
-            return dpnp_outer(x1_desc, x2_desc).get_pyobj()
+    if x1_is_scalar and x2_is_scalar:
+        pass
+    elif not (x1_is_scalar or dpnp.is_supported_array_type(x1)):
+        pass
+    elif not (x2_is_scalar or dpnp.is_supported_array_type(x2)):
+        pass
+    else:
+        x1_in = x1 if x1_is_scalar else (x1.reshape(-1) if x1.ndim > 1 else x1)[:, None]
+        x2_in = x2 if x2_is_scalar else (x2.reshape(-1) if x2.ndim > 1 else x2)[None, :]
+        return dpnp.multiply(x1_in, x2_in, out=out)
 
-    return call_origin(numpy.outer, x1, x2, **kwargs)
+    return call_origin(numpy.outer, x1, x2, out=out)
 
 
 def tensordot(x1, x2, axes=2):
diff --git a/dpnp/dpnp_iface_manipulation.py b/dpnp/dpnp_iface_manipulation.py
index b317a0a9a11a..1ca879abe1aa 100644
--- a/dpnp/dpnp_iface_manipulation.py
+++ b/dpnp/dpnp_iface_manipulation.py
@@ -67,7 +67,9 @@
     "ravel",
     "repeat",
     "reshape",
+    "result_type",
     "rollaxis",
+    "shape",
     "squeeze",
     "stack",
     "swapaxes",
@@ -501,26 +503,124 @@ def repeat(x1, repeats, axis=None):
     return call_origin(numpy.repeat, x1, repeats, axis)
 
 
-def reshape(x1, newshape, order='C'):
+def reshape(x, /, newshape, order='C', copy=None):
     """
     Gives a new shape to an array without changing its data.
 
     For full documentation refer to :obj:`numpy.reshape`.
 
+    Parameters
+    ----------
+    x : {dpnp_array, usm_ndarray}
+        Array to be reshaped.
+    newshape : int or tuple of ints
+        The new shape should be compatible with the original shape. If
+        an integer, then the result will be a 1-D array of that length.
+        One shape dimension can be -1. In this case, the value is
+        inferred from the length of the array and remaining dimensions.
+    order : {'C', 'F'}, optional
+        Read the elements of `x` using this index order, and place the
+        elements into the reshaped array using this index order.  'C'
+        means to read / write the elements using C-like index order,
+        with the last axis index changing fastest, back to the first
+        axis index changing slowest. 'F' means to read / write the
+        elements using Fortran-like index order, with the first index
+        changing fastest, and the last index changing slowest. Note that
+        the 'C' and 'F' options take no account of the memory layout of
+        the underlying array, and only refer to the order of indexing.
+    copy : bool, optional
+        Boolean indicating whether or not to copy the input array.
+        If ``True``, the result array will always be a copy of input `x`.
+        If ``False``, the result array can never be a copy
+        and a ValueError exception will be raised in case the copy is necessary.
+        If ``None``, the result array will reuse existing memory buffer of `x`
+        if possible and copy otherwise. Default: None.
+
+    Returns
+    -------
+    y : dpnp.ndarray
+        This will be a new view object if possible; otherwise, it will
+        be a copy.  Note there is no guarantee of the *memory layout* (C- or
+        Fortran- contiguous) of the returned array.
+    
     Limitations
     -----------
-    Only 'C' order is supported.
+    Parameter `order` is supported only with values ``"C"`` and ``"F"``.
+
+    See Also
+    --------
+    :obj:`dpnp.ndarray.reshape` : Equivalent method.
+
+    Examples
+    --------
+    >>> import dpnp as dp
+    >>> a = dp.array([[1, 2, 3], [4, 5, 6]])
+    >>> dp.reshape(a, 6)
+    array([1, 2, 3, 4, 5, 6])
+    >>> dp.reshape(a, 6, order='F')
+    array([1, 4, 2, 5, 3, 6])
+
+    >>> dp.reshape(a, (3, -1))       # the unspecified value is inferred to be 2
+    array([[1, 2],
+           [3, 4],
+           [5, 6]])
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
-    if x1_desc:
-        if order != 'C':
-            pass
-        else:
-            return dpnp_reshape(x1_desc, newshape, order).get_pyobj()
+    if newshape is None:
+        newshape = x.shape
+
+    if order is None:
+        order = 'C'
+    elif not order in "cfCF":
+        raise ValueError(f"order must be one of 'C' or 'F' (got {order})")
+
+    usm_arr = dpnp.get_usm_ndarray(x)
+    usm_arr = dpt.reshape(usm_arr, shape=newshape, order=order, copy=copy)
+    return dpnp_array._create_from_usm_ndarray(usm_arr)
 
-    return call_origin(numpy.reshape, x1, newshape, order)
+
+def result_type(*arrays_and_dtypes):
+    """
+    Returns the type that results from applying the NumPy
+    type promotion rules to the arguments.
+
+    For full documentation refer to :obj:`numpy.result_type`.
+
+    Parameters
+    ----------
+    arrays_and_dtypes : list of arrays and dtypes
+        An arbitrary length sequence of arrays or dtypes.
+
+    Returns
+    -------
+    out : dtype
+        The result type.
+
+    Limitations
+    -----------
+    An array in the input list is supported as either :class:`dpnp.ndarray`
+    or :class:`dpctl.tensor.usm_ndarray`.
+
+    Examples
+    --------
+    >>> import dpnp as dp
+    >>> dp.result_type(dp.arange(3, dtype=dp.int64), dp.arange(7, dtype=dp.int32))
+    dtype('int64')
+
+    >>> dp.result_type(dp.int64, dp.complex128)
+    dtype('complex128')
+
+    >>> dp.result_type(dp.ones(10, dtype=dp.float32), dp.float64)
+    dtype('float64')
+
+    """
+
+    usm_arrays_and_dtypes = [
+        X.dtype if isinstance(X, (dpnp_array, dpt.usm_ndarray)) else X
+        for X in arrays_and_dtypes
+    ]
+    return dpt.result_type(*usm_arrays_and_dtypes)
 
 
 def rollaxis(x1, axis, start=0):
@@ -571,6 +671,49 @@ def rollaxis(x1, axis, start=0):
     return call_origin(numpy.rollaxis, x1, axis, start)
 
 
+def shape(a):
+    """
+    Return the shape of an array.
+
+    For full documentation refer to :obj:`numpy.shape`.
+    
+    Parameters
+    ----------
+    a : array_like
+        Input array.
+
+    Returns
+    -------
+    shape : tuple of ints
+        The elements of the shape tuple give the lengths of the
+        corresponding array dimensions.
+
+    See Also
+    --------
+    len : ``len(a)`` is equivalent to ``np.shape(a)[0]`` for N-D arrays with
+          ``N>=1``.
+    :obj:`dpnp.ndarray.shape` : Equivalent array method.
+
+    Examples
+    --------
+    >>> import dpnp as dp
+    >>> dp.shape(dp.eye(3))
+    (3, 3)
+    >>> dp.shape([[1, 3]])
+    (1, 2)
+    >>> dp.shape([0])
+    (1,)
+    >>> dp.shape(0)
+    ()
+
+    """
+
+    if dpnp.is_supported_array_type(a):
+        return a.shape
+    else:
+        return numpy.shape(a)
+
+
 def squeeze(x, /, axis=None):
     """
     Removes singleton dimensions (axes) from array `x`.
@@ -673,54 +816,65 @@ def swapaxes(x1, axis1, axis2):
     return call_origin(numpy.swapaxes, x1, axis1, axis2)
 
 
-def transpose(x1, axes=None):
+def transpose(a, axes=None):
     """
-    Reverse or permute the axes of an array; returns the modified array.
+    Returns an array with axes transposed.
 
     For full documentation refer to :obj:`numpy.transpose`.
 
+    Returns
+    -------
+    y : dpnp.ndarray
+        `a` with its axes permuted. A view is returned whenever possible.
+
     Limitations
     -----------
-    Input array is supported as :obj:`dpnp.ndarray`.
-    Otherwise the function will be executed sequentially on CPU.
-    Value of the parameter ``axes`` likely to be replaced with ``None``.
-    Input array data types are limited by supported DPNP :ref:`Data types`.
+    Input array is supported as either :class:`dpnp.ndarray`
+    or :class:`dpctl.tensor.usm_ndarray`.
 
     See Also
     --------
+    :obj:`dpnp.ndarray.transpose` : Equivalent method.
     :obj:`dpnp.moveaxis` : Move array axes to new positions.
     :obj:`dpnp.argsort` : Returns the indices that would sort an array.
 
     Examples
     --------
-    >>> import dpnp as np
-    >>> x = np.arange(4).reshape((2,2))
-    >>> x.shape
-    (2, 2)
-    >>> [i for i in x]
-    [0, 1, 2, 3]
-    >>> out = np.transpose(x)
-    >>> out.shape
-    (2, 2)
-    >>> [i for i in out]
-    [0, 2, 1, 3]
-
-    """
-
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
-    if x1_desc:
-        if axes is not None:
-            if not any(axes):
-                """
-                pytest tests/third_party/cupy/manipulation_tests/test_transpose.py
-                """
-                axes = None
-
-        result = dpnp_transpose(x1_desc, axes).get_pyobj()
-
-        return result
-
-    return call_origin(numpy.transpose, x1, axes=axes)
+    >>> import dpnp as dp
+    >>> a = dp.array([[1, 2], [3, 4]])
+    >>> a
+    array([[1, 2],
+           [3, 4]])
+    >>> dp.transpose(a)
+    array([[1, 3],
+           [2, 4]])
+
+    >>> a = dp.array([1, 2, 3, 4])
+    >>> a
+    array([1, 2, 3, 4])
+    >>> dp.transpose(a)
+    array([1, 2, 3, 4])
+
+    >>> a = dp.ones((1, 2, 3))
+    >>> dp.transpose(a, (1, 0, 2)).shape
+    (2, 1, 3)
+
+    >>> a = dp.ones((2, 3, 4, 5))
+    >>> dp.transpose(a).shape
+    (5, 4, 3, 2)
+
+    """
+
+    if isinstance(a, dpnp_array):
+        array = a
+    elif isinstance(a, dpt.usm_ndarray):
+        array = dpnp_array._create_from_usm_ndarray(a.get_array())
+    else:
+        raise TypeError("An array must be any of supported type, but got {}".format(type(a)))
+
+    if axes is None:
+        return array.transpose()
+    return array.transpose(*axes)
 
 
 def unique(x1, **kwargs):
diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py
index 9e877703b409..49f839493f2d 100644
--- a/dpnp/dpnp_iface_mathematical.py
+++ b/dpnp/dpnp_iface_mathematical.py
@@ -42,11 +42,15 @@
 
 from .dpnp_algo import *
 from .dpnp_algo.dpnp_elementwise_common import (
-    dpnp_divide
+    dpnp_add,
+    dpnp_divide,
+    dpnp_multiply,
+    dpnp_subtract
 )
 from .dpnp_utils import *
 
 import dpnp
+from dpnp.dpnp_array import dpnp_array
 
 import numpy
 import dpctl.tensor as dpt
@@ -98,8 +102,12 @@
 ]
 
 
-def _check_nd_call(origin_func, dpnp_func, x1, x2, out=None, where=True, dtype=None, subok=True, **kwargs):
-    """Choose function to call based on input and call chosen fucntion."""
+def _check_nd_call(origin_func, dpnp_func, x1, x2, out=None, where=True, order='K', dtype=None, subok=True, **kwargs):
+    """
+    Chooses a common internal elementwise function to call in DPNP based on input arguments
+    or to fallback on NumPy call if any passed argument is not currently supported.
+
+    """
 
     if kwargs:
         pass
@@ -113,24 +121,15 @@ def _check_nd_call(origin_func, dpnp_func, x1, x2, out=None, where=True, dtype=N
         # at least either x1 or x2 has to be an array
         pass
     else:
-        # get USM type and queue to copy scalar from the host memory into a USM allocation
-        usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None)
-
-        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False,
-                                           alloc_usm_type=usm_type, alloc_queue=queue)
-        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False,
-                                           alloc_usm_type=usm_type, alloc_queue=queue)
-        if x1_desc and x2_desc:
-            if out is not None:
-                if not isinstance(out, (dpnp.ndarray, dpt.usm_ndarray)):
-                    raise TypeError("return array must be of supported array type")
-                out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) or None
-            else:
-                out_desc = None
-
-            return dpnp_func(x1_desc, x2_desc, dtype=dtype, out=out_desc, where=where).get_pyobj()
+        if order in "afkcAFKC":
+            order = order.upper()
+        elif order is None:
+            order = 'K'
+        else:
+            raise ValueError("order must be one of 'C', 'F', 'A', or 'K' (got '{}')".format(order))
 
-    return call_origin(origin_func, x1, x2, dtype=dtype, out=out, where=where, **kwargs)
+        return dpnp_func(x1, x2, out=out, order=order)
+    return call_origin(origin_func, x1, x2, out=out, where=where, order=order, dtype=dtype, subok=subok, **kwargs)
 
 
 def abs(*args, **kwargs):
@@ -175,7 +174,7 @@ def absolute(x,
     -------
     y : dpnp.ndarray
         An array containing the absolute value of each element in `x`.
-    
+
     Limitations
     -----------
     Parameters `x` is only supported as either :class:`dpnp.ndarray` or :class:`dpctl.tensor.usm_ndarray`.
@@ -221,6 +220,7 @@ def add(x1,
         out=None,
         *,
         where=True,
+        order='K',
         dtype=None,
         subok=True,
         **kwargs):
@@ -254,7 +254,7 @@ def add(x1,
 
     """
 
-    return _check_nd_call(numpy.add, dpnp_add, x1, x2, out=out, where=where, dtype=dtype, subok=subok, **kwargs)
+    return _check_nd_call(numpy.add, dpnp_add, x1, x2, out=out, where=where, order=order, dtype=dtype, subok=subok, **kwargs)
 
 
 def around(x1, decimals=0, out=None):
@@ -602,7 +602,7 @@ def divide(x1,
     -------
     y : dpnp.ndarray
         The quotient ``x1/x2``, element-wise.
-    
+
     Limitations
     -----------
     Parameters `x1` and `x2` are supported as either scalar, :class:`dpnp.ndarray`
@@ -621,27 +621,7 @@ def divide(x1,
 
     """
 
-    if where is not True:
-        pass
-    elif dtype is not None:
-        pass
-    elif subok is not True:
-        pass
-    elif kwargs:
-        pass
-    elif dpnp.isscalar(x1) and dpnp.isscalar(x2):
-        # at least either x1 or x2 has to be an array
-        pass
-    else:
-        if order in "afkcAFKC":
-            order = order.upper()
-        elif order is None:
-            order = 'K'
-        else:
-            raise ValueError("order must be one of 'C', 'F', 'A', or 'K' (got '{}')".format(order))
-
-        return dpnp_divide(x1, x2, out=out, order=order)
-    return call_origin(numpy.divide, x1, x2, out=out, where=where, dtype=dtype, subok=subok, **kwargs)
+    return _check_nd_call(numpy.divide, dpnp_divide, x1, x2, out=out, where=where, order=order, dtype=dtype, subok=subok, **kwargs)
 
 
 def ediff1d(x1, to_end=None, to_begin=None):
@@ -1140,6 +1120,7 @@ def multiply(x1,
              out=None,
              *,
              where=True,
+             order='K',
              dtype=None,
              subok=True,
              **kwargs):
@@ -1172,7 +1153,7 @@ def multiply(x1,
 
     """
 
-    return _check_nd_call(numpy.multiply, dpnp_multiply, x1, x2, out=out, where=where, dtype=dtype, subok=subok, **kwargs)
+    return _check_nd_call(numpy.multiply, dpnp_multiply, x1, x2, out=out, where=where, order=order, dtype=dtype, subok=subok, **kwargs)
 
 
 def nancumprod(x1, **kwargs):
@@ -1362,7 +1343,7 @@ def power(x1,
     -------
     y : dpnp.ndarray
         The bases in `x1` raised to the exponents in `x2`.
-    
+
     Limitations
     -----------
     Parameters `x1` and `x2` are supported as either scalar, :class:`dpnp.ndarray`
@@ -1390,7 +1371,36 @@ def power(x1,
 
     """
 
-    return _check_nd_call(numpy.power, dpnp_power, x1, x2, out=out, where=where, dtype=dtype, subok=subok, **kwargs)
+    if kwargs:
+        pass
+    elif where is not True:
+        pass
+    elif dtype is not None:
+        pass
+    elif subok is not True:
+        pass
+    elif dpnp.isscalar(x1) and dpnp.isscalar(x2):
+        # at least either x1 or x2 has to be an array
+        pass
+    else:
+        # get USM type and queue to copy scalar from the host memory into a USM allocation
+        usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None)
+
+        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        if x1_desc and x2_desc:
+            if out is not None:
+                if not isinstance(out, (dpnp.ndarray, dpt.usm_ndarray)):
+                    raise TypeError("return array must be of supported array type")
+                out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) or None
+            else:
+                out_desc = None
+
+            return dpnp_power(x1_desc, x2_desc, dtype=dtype, out=out_desc, where=where).get_pyobj()
+
+    return call_origin(numpy.power, x1, x2, dtype=dtype, out=out, where=where, **kwargs)
 
 
 def prod(x1, axis=None, dtype=None, out=None, keepdims=False, initial=None, where=True):
@@ -1546,6 +1556,7 @@ def subtract(x1,
              out=None,
              *,
              where=True,
+             order='K',
              dtype=None,
              subok=True,
              **kwargs):
@@ -1558,7 +1569,7 @@ def subtract(x1,
     -------
     y : dpnp.ndarray
         The difference of `x1` and `x2`, element-wise.
-    
+
     Limitations
     -----------
     Parameters `x1` and `x2` are supported as either scalar, :class:`dpnp.ndarray`
@@ -1577,73 +1588,55 @@ def subtract(x1,
 
     """
 
-    if out is not None:
-        pass
-    elif where is not True:
-        pass
-    elif dtype is not None:
-        pass
-    elif subok is not True:
-        pass
-    elif dpnp.isscalar(x1) and dpnp.isscalar(x2):
-        # at least either x1 or x2 has to be an array
-        pass
-    else:
-        # get USM type and queue to copy scalar from the host memory into a USM allocation
-        usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None)
-
-        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False,
-                                           alloc_usm_type=usm_type, alloc_queue=queue)
-        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False,
-                                           alloc_usm_type=usm_type, alloc_queue=queue)
-        if x1_desc and x2_desc:
-            if x1_desc.dtype == x2_desc.dtype == dpnp.bool:
-                raise TypeError("DPNP boolean subtract, the `-` operator, is not supported, "
-                                "use the bitwise_xor, the `^` operator, or the logical_xor function instead.")
-            return dpnp_subtract(x1_desc, x2_desc, dtype=dtype, out=out, where=where).get_pyobj()
-
-    return call_origin(numpy.subtract, x1, x2, out=out, where=where, dtype=dtype, subok=subok, **kwargs)
+    return _check_nd_call(numpy.subtract, dpnp_subtract, x1, x2, out=out, where=where, order=order, dtype=dtype, subok=subok, **kwargs)
 
 
-def sum(x1, axis=None, dtype=None, out=None, keepdims=False, initial=None, where=True):
+def sum(x, /, *, axis=None, dtype=None, keepdims=False, out=None, initial=0, where=True):
     """
     Sum of array elements over a given axis.
 
     For full documentation refer to :obj:`numpy.sum`.
 
+    Returns
+    -------
+    y : dpnp.ndarray
+        an array containing the sums. If the sum was computed over the
+        entire array, a zero-dimensional array is returned. The returned
+        array has the data type as described in the `dtype` parameter
+        of the Python Array API standard for the `sum` function.
+
     Limitations
     -----------
-        Parameter `where`` is unsupported.
-        Input array data types are limited by DPNP :ref:`Data types`.
+        Parameters `x` is supported as either :class:`dpnp.ndarray`
+        or :class:`dpctl.tensor.usm_ndarray`.
+        Parameters `out`, `initial` and `where` are supported with their default values.
+        Otherwise the function will be executed sequentially on CPU.
+        Input array data types are limited by supported DPNP :ref:`Data types`.
 
     Examples
     --------
     >>> import dpnp as np
     >>> np.sum(np.array([1, 2, 3, 4, 5]))
-    15
-    >>> result = np.sum([[0, 1], [0, 5]], axis=0)
-    [0, 6]
+    array(15)
+    >>> np.sum(np.array(5))
+    array(5)
+    >>> result = np.sum(np.array([[0, 1], [0, 5]]), axis=0)
+    array([0, 6])
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
-    if x1_desc:
-        if where is not True:
-            pass
-        else:
-            if dpnp.isscalar(out):
-                raise TypeError("output must be an array")
-            out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) if out is not None else None
-            result_obj = dpnp_sum(x1_desc, axis, dtype, out_desc, keepdims, initial, where).get_pyobj()
-            result = dpnp.convert_single_elem_array_to_scalar(result_obj, keepdims)
 
-            if x1_desc.size == 0 and axis is None:
-                result = dpnp.zeros_like(result)
-                if out is not None:
-                    out[...] = result
-            return result
+    if out is not None:
+        pass
+    elif initial != 0:
+        pass
+    elif where is not True:
+        pass
+    else:
+        y = dpt.sum(dpnp.get_usm_ndarray(x), axis=axis, dtype=dtype, keepdims=keepdims)
+        return dpnp_array._create_from_usm_ndarray(y)
 
-    return call_origin(numpy.sum, x1, axis=axis, dtype=dtype, out=out, keepdims=keepdims, initial=initial, where=where)
+    return call_origin(numpy.sum, x, axis=axis, dtype=dtype, out=out, keepdims=keepdims, initial=initial, where=where)
 
 
 def trapz(y1, x1=None, dx=1.0, axis=-1):
diff --git a/dpnp/dpnp_iface_statistics.py b/dpnp/dpnp_iface_statistics.py
index 966a72142695..45a3757d1a35 100644
--- a/dpnp/dpnp_iface_statistics.py
+++ b/dpnp/dpnp_iface_statistics.py
@@ -42,8 +42,12 @@
 
 import numpy
 import dpctl.tensor as dpt
+from numpy.core.numeric import normalize_axis_tuple
 from dpnp.dpnp_algo import *
 from dpnp.dpnp_utils import *
+from dpnp.dpnp_utils.dpnp_utils_statistics import (
+    dpnp_cov
+)
 from dpnp.dpnp_array import dpnp_array
 import dpnp
 
@@ -237,13 +241,18 @@ def correlate(x1, x2, mode='valid'):
     return call_origin(numpy.correlate, x1, x2, mode=mode)
 
 
-def cov(x1, y=None, rowvar=True, bias=False, ddof=None, fweights=None, aweights=None):
-    """cov(m, y=None, rowvar=True, bias=False, ddof=None, fweights=None, aweights=None):
+def cov(m, y=None, rowvar=True, bias=False, ddof=None, fweights=None, aweights=None, *, dtype=None):
+    """cov(m, y=None, rowvar=True, bias=False, ddof=None, fweights=None, aweights=None, *, dtype=None):
 
     Estimate a covariance matrix, given data and weights.
 
     For full documentation refer to :obj:`numpy.cov`.
 
+    Returns
+    -------
+    out : dpnp.ndarray
+        The covariance matrix of the variables.
+
     Limitations
     -----------
     Input array ``m`` is supported as :obj:`dpnp.ndarray`.
@@ -257,7 +266,9 @@ def cov(x1, y=None, rowvar=True, bias=False, ddof=None, fweights=None, aweights=
     Otherwise the function will be executed sequentially on CPU.
     Input array data types are limited by supported DPNP :ref:`Data types`.
 
-    .. see also:: :obj:`dpnp.corrcoef` normalized covariance matrix.
+    See Also
+    --------
+    :obj:`dpnp.corrcoef` : Normalized covariance matrix
 
     Examples
     --------
@@ -274,11 +285,10 @@ def cov(x1, y=None, rowvar=True, bias=False, ddof=None, fweights=None, aweights=
     [1.0, -1.0, -1.0, 1.0]
 
     """
-    if not isinstance(x1, (dpnp_array, dpt.usm_ndarray)):
-        pass
-    elif x1.ndim > 2:
+
+    if not isinstance(m, (dpnp_array, dpt.usm_ndarray)):
         pass
-    elif y is not None:
+    elif m.ndim > 2:
         pass
     elif bias:
         pass
@@ -289,18 +299,9 @@ def cov(x1, y=None, rowvar=True, bias=False, ddof=None, fweights=None, aweights=
     elif aweights is not None:
         pass
     else:
-        if not rowvar and x1.shape[0] != 1:
-            x1 = x1.get_array() if isinstance(x1, dpnp_array) else x1
-            x1 = dpnp_array._create_from_usm_ndarray(x1.mT)
-
-        if not x1.dtype in (dpnp.float32, dpnp.float64):
-            x1 = dpnp.astype(x1, dpnp.default_float_type(sycl_queue=x1.sycl_queue))
+        return dpnp_cov(m, y=y, rowvar=rowvar, dtype=dtype)
 
-        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
-        if x1_desc:
-            return dpnp_cov(x1_desc).get_pyobj()
-
-    return call_origin(numpy.cov, x1, y, rowvar, bias, ddof, fweights, aweights)
+    return call_origin(numpy.cov, m, y, rowvar, bias, ddof, fweights, aweights, dtype=dtype)
 
 
 def histogram(a, bins=10, range=None, density=None, weights=None):
@@ -395,18 +396,23 @@ def max(x1, axis=None, out=None, keepdims=False, initial=None, where=True):
     return call_origin(numpy.max, x1, axis, out, keepdims, initial, where)
 
 
-def mean(x1, axis=None, **kwargs):
+def mean(x, /, *, axis=None, dtype=None, keepdims=False, out=None, where=True):
     """
     Compute the arithmetic mean along the specified axis.
 
     For full documentation refer to :obj:`numpy.mean`.
 
+    Returns
+    -------
+    y : dpnp.ndarray
+        an array containing the mean values of the elements along the specified axis(axes).
+        If the input array is empty, an array containing a single NaN value is returned.
+
     Limitations
     -----------
-    Input array is supported as :obj:`dpnp.ndarray`.
-    Prameters ``axis`` is supported only with default value ``None``.
-    Keyword arguments ``kwargs`` are currently unsupported.
-    Size of input array is limited by ``x1.size > 0``.
+    Parameters `x` is supported as either :class:`dpnp.ndarray`
+    or :class:`dpctl.tensor.usm_ndarray`.
+    Parameters `keepdims`, `out` and `where` are supported with their default values.
     Otherwise the function will be executed sequentially on CPU.
     Input array data types are limited by supported DPNP :ref:`Data types`.
 
@@ -427,23 +433,52 @@ def mean(x1, axis=None, **kwargs):
     >>> import dpnp as np
     >>> a = np.array([[1, 2], [3, 4]])
     >>> np.mean(a)
-    2.5
-
+    array(2.5)
+    >>> np.mean(a, axis=0)
+    array([2., 3.])
+    >>> np.mean(a, axis=1)
+    array([1.5, 3.5])
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
-    if x1_desc and not kwargs:
-        if x1_desc.size == 0:
-            pass
-        elif axis is not None:
-            pass
+    if keepdims is not False:
+        pass
+    elif out is not None:
+        pass
+    elif where is not True:
+        pass
+    else:
+        if dtype is None and dpnp.issubdtype(x.dtype, dpnp.inexact):
+            dtype = x.dtype
+
+        if axis is None:
+            if x.size == 0:
+                return dpnp.array(dpnp.nan, dtype=dtype)
+            else:
+                result = dpnp.sum(x, dtype=dtype) / x.size
+                return result.astype(dtype) if result.dtype != dtype else result
+
+        if not isinstance(axis,(tuple,list)):
+            axis = (axis,)
+
+        axis = normalize_axis_tuple(axis, x.ndim, "axis")
+        res_sum = dpnp.sum(x, axis=axis, dtype=dtype)
+
+        del_ = 1.0
+        for axis_value in axis:
+            del_ *= x.shape[axis_value]
+
+        #performing an inplace operation on arrays of bool or integer types
+        #is not possible due to incompatible data types because
+        #it returns a floating value
+        if dpnp.issubdtype(res_sum.dtype, dpnp.inexact):
+            res_sum /= del_
         else:
-            result_obj = dpnp_mean(x1_desc, axis)
-            result = dpnp.convert_single_elem_array_to_scalar(result_obj)
+            new_res_sum = res_sum / del_
+            return new_res_sum.astype(dtype) if new_res_sum.dtype != dtype else new_res_sum
 
-            return result
+        return res_sum.astype(dtype) if res_sum.dtype != dtype else res_sum
 
-    return call_origin(numpy.mean, x1, axis=axis, **kwargs)
+    return call_origin(numpy.mean, x, axis=axis, dtype=dtype, out=out, keepdims=keepdims, where=where)
 
 
 def median(x1, axis=None, out=None, overwrite_input=False, keepdims=False):
diff --git a/dpnp/dpnp_utils/dpnp_utils_statistics.py b/dpnp/dpnp_utils/dpnp_utils_statistics.py
new file mode 100644
index 000000000000..9e49655e9eef
--- /dev/null
+++ b/dpnp/dpnp_utils/dpnp_utils_statistics.py
@@ -0,0 +1,117 @@
+# cython: language_level=3
+# distutils: language = c++
+# -*- coding: utf-8 -*-
+# *****************************************************************************
+# Copyright (c) 2023, Intel Corporation
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# - Redistributions of source code must retain the above copyright notice,
+#   this list of conditions and the following disclaimer.
+# - Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+# THE POSSIBILITY OF SUCH DAMAGE.
+# *****************************************************************************
+
+
+import dpnp
+from dpnp.dpnp_array import dpnp_array
+from dpnp.dpnp_utils import (
+    get_usm_allocations
+)
+
+import dpctl
+import dpctl.tensor as dpt
+import dpctl.tensor._tensor_impl as ti
+
+
+__all__ = [
+    "dpnp_cov"
+]
+
+def dpnp_cov(m, y=None, rowvar=True, dtype=None):
+    """
+    Estimate a covariance matrix based on passed data.
+    No support for given weights is provided now.
+
+    The implementation is done through existing dpnp and dpctl methods
+    instead of separate function call of dpnp backend.
+
+    """
+
+    def _get_2dmin_array(x, dtype):
+        """
+        Transform an input array to a form required for building a covariance matrix.
+
+        If applicable, it reshapes the input array to have 2 dimensions or greater.
+        If applicable, it transposes the input array when 'rowvar' is False.
+        It casts to another dtype, if the input array differs from requested one.
+
+        """
+
+        if x.ndim == 0:
+            x = x.reshape((1, 1))
+        elif x.ndim == 1:
+            x = x[dpnp.newaxis, :]
+
+        if not rowvar and x.shape[0] != 1:
+            x = x.T
+
+        if x.dtype != dtype:
+            x = dpnp.astype(x, dtype)
+        return x
+
+
+    # input arrays must follow CFD paradigm
+    usm_type, queue = get_usm_allocations((m, ) if y is None else (m, y))
+
+    # calculate a type of result array if not passed explicitly
+    if dtype is None:
+        dtypes = [m.dtype, dpnp.default_float_type(sycl_queue=queue)]
+        if y is not None:
+            dtypes.append(y.dtype)
+        dtype = dpt.result_type(*dtypes)
+
+    X = _get_2dmin_array(m, dtype)
+    if y is not None:
+        y = _get_2dmin_array(y, dtype)
+
+        # TODO: replace with dpnp.concatenate((X, y), axis=0) once dpctl implementation is ready
+        if X.ndim != y.ndim:
+            raise ValueError("all the input arrays must have same number of dimensions")
+
+        if X.shape[1:] != y.shape[1:]:
+            raise ValueError("all the input array dimensions for the concatenation axis must match exactly")
+
+        res_shape = tuple(X.shape[i] if i > 0 else (X.shape[i] + y.shape[i]) for i in range(X.ndim))
+        res_usm = dpt.empty(res_shape, dtype=dtype, usm_type=usm_type, sycl_queue=queue)
+
+        # concatenate input arrays 'm' and 'y' into single array among 0-axis
+        hev1, _ = ti._copy_usm_ndarray_into_usm_ndarray(src=X.get_array(), dst=res_usm[:X.shape[0]], sycl_queue=queue)
+        hev2, _ = ti._copy_usm_ndarray_into_usm_ndarray(src=y.get_array(), dst=res_usm[X.shape[0]:], sycl_queue=queue)
+        dpctl.SyclEvent.wait_for([hev1, hev2])
+
+        X = dpnp_array._create_from_usm_ndarray(res_usm)
+
+    avg = X.mean(axis=1)
+
+    fact = X.shape[1] - 1
+    X -= avg[:, None]
+
+    c = dpnp.dot(X, X.T.conj())
+    c *= 1 / fact if fact != 0 else dpnp.nan
+
+    return dpnp.squeeze(c)
diff --git a/dpnp/random/dpnp_algo_random.pyx b/dpnp/random/dpnp_algo_random.pyx
index 504e365405b4..d0a400a0967f 100644
--- a/dpnp/random/dpnp_algo_random.pyx
+++ b/dpnp/random/dpnp_algo_random.pyx
@@ -42,7 +42,7 @@ import dpnp.config as config
 from dpnp.dpnp_array import dpnp_array
 
 from libc.stdlib cimport free, malloc
-from libc.stdint cimport uint32_t, int64_t
+from libc.stdint cimport uint32_t, uint64_t, int64_t
 
 from dpnp.dpnp_algo cimport *
 cimport dpctl as c_dpctl
@@ -53,6 +53,7 @@ cimport numpy
 
 
 __all__ = [
+    "MCG59",
     "MT19937",
     "dpnp_rng_beta",
     "dpnp_rng_binomial",
@@ -271,28 +272,28 @@ ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_rng_zipf_c_1out_t)(c_dpctl.DPCTLSy
 
 
 cdef extern from "dpnp_random_state.hpp":
+    cdef struct engine_struct:
+        pass
+
     cdef struct mt19937_struct:
         pass
     void MT19937_InitScalarSeed(mt19937_struct *, c_dpctl.DPCTLSyclQueueRef, uint32_t)
     void MT19937_InitVectorSeed(mt19937_struct *, c_dpctl.DPCTLSyclQueueRef, uint32_t *, unsigned int)
     void MT19937_Delete(mt19937_struct *)
 
+    cdef struct mcg59_struct:
+        pass
+    void MCG59_InitScalarSeed(mcg59_struct *, c_dpctl.DPCTLSyclQueueRef, uint64_t)
+    void MCG59_Delete(mcg59_struct *)
 
-cdef class MT19937:
-    """
-    Class storing MKL engine for MT199374x32x10 algorithm.
-    """
 
-    cdef mt19937_struct mt19937
+cdef class _Engine:
+    cdef engine_struct* engine_base
     cdef c_dpctl.DPCTLSyclQueueRef q_ref
     cdef c_dpctl.SyclQueue q
 
     def __cinit__(self, seed, sycl_queue):
-        cdef bint is_vector_seed = False
-        cdef uint32_t scalar_seed = 0
-        cdef unsigned int vector_seed_len = 0
-        cdef unsigned int *vector_seed = NULL
-
+        self.engine_base = NULL
         self.q_ref = NULL
         if sycl_queue is None:
             raise ValueError("SyclQueue isn't defined")
@@ -303,78 +304,28 @@ cdef class MT19937:
         if self.q_ref is NULL:
             raise ValueError("SyclQueue copy failed")
 
-        # get a scalar seed value or a vector of seeds
-        if self.is_integer(seed):
-            if self.is_uint_range(seed):
-                scalar_seed = <uint32_t> seed
-            else:
-                raise ValueError("Seed must be between 0 and 2**32 - 1")
-        elif isinstance(seed, (list, tuple, range, numpy.ndarray, dpnp_array)):
-            if len(seed) == 0:
-                raise ValueError("Seed must be non-empty")
-            elif numpy.ndim(seed) > 1:
-                raise ValueError("Seed array must be 1-d")
-            elif not all([self.is_integer(item) for item in seed]):
-                raise TypeError("Seed must be a sequence of unsigned int elements")
-            elif not all([self.is_uint_range(item) for item in seed]):
-                raise ValueError("Seed must be between 0 and 2**32 - 1")
-            else:
-                is_vector_seed = True
-                vector_seed_len = len(seed)
-                if vector_seed_len > 3:
-                    raise ValueError(
-                        f"{vector_seed_len} length of seed vector isn't supported, "
-                        "the length is limited by 3")
-
-                vector_seed = <uint32_t *> malloc(vector_seed_len * sizeof(uint32_t))
-                if (not vector_seed):
-                    raise MemoryError(f"Could not allocate memory for seed vector of length {vector_seed_len}")
-
-                # convert input seed's type to uint32_t one (expected in MKL function)
-                try:
-                    for i in range(vector_seed_len):
-                        vector_seed[i] = <uint32_t> seed[i]
-                except (ValueError, TypeError) as e:
-                    free(vector_seed)
-                    raise e
-        else:
-            raise TypeError("Seed must be an unsigned int, or a sequence of unsigned int elements")
-
-        if is_vector_seed:
-            MT19937_InitVectorSeed(&self.mt19937, self.q_ref, vector_seed, vector_seed_len)
-            free(vector_seed)
-        else:
-            MT19937_InitScalarSeed(&self.mt19937, self.q_ref, scalar_seed)
-
-
     def __dealloc__(self):
-        MT19937_Delete(&self.mt19937)
+        self.engine_base = NULL
         c_dpctl.DPCTLQueue_Delete(self.q_ref)
 
-
     cdef bint is_integer(self, value):
         if isinstance(value, numbers.Number):
             return isinstance(value, int) or isinstance(value, dpnp.integer)
         # cover an element of dpnp array:
         return numpy.ndim(value) == 0 and hasattr(value, "dtype") and dpnp.issubdtype(value, dpnp.integer)
 
+    cdef void set_engine(self, engine_struct* engine):
+        self.engine_base = engine
 
-    cdef bint is_uint_range(self, value):
-        return value >= 0 and value <= numpy.iinfo(numpy.uint32).max
-
-
-    cdef mt19937_struct * get_mt19937(self):
-        return &self.mt19937
-
+    cdef engine_struct* get_engine(self):
+        return self.engine_base
 
     cdef c_dpctl.SyclQueue get_queue(self):
         return self.q
 
-
     cdef c_dpctl.DPCTLSyclQueueRef get_queue_ref(self):
         return self.q_ref
 
-
     cpdef utils.dpnp_descriptor normal(self, loc, scale, size, dtype, usm_type):
         cdef shape_type_c result_shape
         cdef utils.dpnp_descriptor result
@@ -403,14 +354,13 @@ cdef class MT19937:
 
         func = <fptr_dpnp_rng_normal_c_1out_t > kernel_data.ptr
         # call FPTR function
-        event_ref = func(self.get_queue_ref(), result.get_data(), loc, scale, result.size, self.get_mt19937(), NULL)
+        event_ref = func(self.get_queue_ref(), result.get_data(), loc, scale, result.size, self.get_engine(), NULL)
 
         if event_ref != NULL:
             with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
             c_dpctl.DPCTLEvent_Delete(event_ref)
         return result
 
-
     cpdef utils.dpnp_descriptor uniform(self, low, high, size, dtype, usm_type):
         cdef shape_type_c result_shape
         cdef utils.dpnp_descriptor result
@@ -439,7 +389,7 @@ cdef class MT19937:
 
         func = <fptr_dpnp_rng_uniform_c_1out_t > kernel_data.ptr
         # call FPTR function
-        event_ref = func(self.get_queue_ref(), result.get_data(), low, high, result.size, self.get_mt19937(), NULL)
+        event_ref = func(self.get_queue_ref(), result.get_data(), low, high, result.size, self.get_engine(), NULL)
 
         if event_ref != NULL:
             with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
@@ -447,6 +397,102 @@ cdef class MT19937:
         return result
 
 
+cdef class MT19937(_Engine):
+    """
+    Class storing MKL engine for MT199374x32x10 (The Mersenne Twister pseudorandom number generator).
+
+    """
+
+    cdef mt19937_struct mt19937
+
+    def __cinit__(self, seed, sycl_queue):
+        cdef bint is_vector_seed = False
+        cdef uint32_t scalar_seed = 0
+        cdef unsigned int vector_seed_len = 0
+        cdef unsigned int *vector_seed = NULL
+
+        # get a scalar seed value or a vector of seeds
+        if self.is_integer(seed):
+            if self.is_uint_range(seed):
+                scalar_seed = <uint32_t> seed
+            else:
+                raise ValueError("Seed must be between 0 and 2**32 - 1")
+        elif isinstance(seed, (list, tuple, range, numpy.ndarray, dpnp_array)):
+            if len(seed) == 0:
+                raise ValueError("Seed must be non-empty")
+            elif numpy.ndim(seed) > 1:
+                raise ValueError("Seed array must be 1-d")
+            elif not all([self.is_integer(item) for item in seed]):
+                raise TypeError("Seed must be a sequence of unsigned int elements")
+            elif not all([self.is_uint_range(item) for item in seed]):
+                raise ValueError("Seed must be between 0 and 2**32 - 1")
+            else:
+                is_vector_seed = True
+                vector_seed_len = len(seed)
+                if vector_seed_len > 3:
+                    raise ValueError(
+                        f"{vector_seed_len} length of seed vector isn't supported, "
+                        "the length is limited by 3")
+
+                vector_seed = <uint32_t *> malloc(vector_seed_len * sizeof(uint32_t))
+                if (not vector_seed):
+                    raise MemoryError(f"Could not allocate memory for seed vector of length {vector_seed_len}")
+
+                # convert input seed's type to uint32_t one (expected in MKL function)
+                try:
+                    for i in range(vector_seed_len):
+                        vector_seed[i] = <uint32_t> seed[i]
+                except (ValueError, TypeError) as e:
+                    free(vector_seed)
+                    raise e
+        else:
+            raise TypeError("Seed must be an unsigned int, or a sequence of unsigned int elements")
+
+        if is_vector_seed:
+            MT19937_InitVectorSeed(&self.mt19937, self.q_ref, vector_seed, vector_seed_len)
+            free(vector_seed)
+        else:
+            MT19937_InitScalarSeed(&self.mt19937, self.q_ref, scalar_seed)
+        self.set_engine(<engine_struct*> &self.mt19937)
+
+    def __dealloc__(self):
+        MT19937_Delete(&self.mt19937)
+
+    cdef bint is_uint_range(self, value):
+        return value >= 0 and value <= numpy.iinfo(numpy.uint32).max
+
+
+cdef class MCG59(_Engine):
+    """
+    Class storing MKL engine for MCG59
+    (the 59-bit multiplicative congruential pseudorandom number generator).
+
+    """
+
+    cdef mcg59_struct mcg59
+
+    def __cinit__(self, seed, sycl_queue):
+        cdef uint64_t scalar_seed = 1
+
+        # get a scalar seed value or a vector of seeds
+        if self.is_integer(seed):
+            if self.is_uint64_range(seed):
+                scalar_seed = <uint64_t> seed
+            else:
+                raise ValueError("Seed must be between 0 and 2**64 - 1")
+        else:
+            raise TypeError("Seed must be an integer")
+
+        MCG59_InitScalarSeed(&self.mcg59, self.q_ref, scalar_seed)
+        self.set_engine(<engine_struct*> &self.mcg59)
+
+    def __dealloc__(self):
+        MCG59_Delete(&self.mcg59)
+
+    cdef bint is_uint64_range(self, value):
+        return value >= 0 and value <= numpy.iinfo(numpy.uint64).max
+
+
 cpdef utils.dpnp_descriptor dpnp_rng_beta(double a, double b, size):
     """
     Returns an array populated with samples from beta distribution.
diff --git a/dpnp/random/dpnp_random_state.py b/dpnp/random/dpnp_random_state.py
index c224553b0cff..462f4538dbdd 100644
--- a/dpnp/random/dpnp_random_state.py
+++ b/dpnp/random/dpnp_random_state.py
@@ -44,7 +44,10 @@
     map_dtype_to_device,
     use_origin_backend
 )
-from dpnp.random.dpnp_algo_random import MT19937
+from dpnp.random.dpnp_algo_random import (
+    MCG59,
+    MT19937
+)
 
 
 __all__ = [
@@ -76,19 +79,32 @@ class RandomState:
     """
 
     def __init__(self, seed=None, device=None, sycl_queue=None):
+        self._sycl_queue = dpnp.get_normalized_queue_device(device=device, sycl_queue=sycl_queue)
+        self._sycl_device = self._sycl_queue.sycl_device
+
+        is_cpu = self._sycl_device.is_cpu
         if seed is None:
-            # ask NumPy to generate an array of three random integers as default seed value
-            self._seed = numpy.random.randint(low=0, high=numpy.iinfo(numpy.int32).max + 1, size=3)
+            low = 0
+            high = numpy.iinfo(numpy.int32).max + 1
+
+            if is_cpu:
+                # ask NumPy to generate an array of three random integers as default seed value
+                self._seed = numpy.random.randint(low=low, high=high, size=3)
+            else:
+                # ask NumPy to generate a random 32-bit integer as default seed value
+                self._seed = numpy.random.randint(low=low, high=high, size=1)[0]
         else:
             self._seed = seed
 
-        self._sycl_queue = dpnp.get_normalized_queue_device(device=device, sycl_queue=sycl_queue)
-        self._sycl_device = self._sycl_queue.sycl_device
-
         # 'float32' is default floating data type if device doesn't support 'float64'
         self._def_float_type = map_dtype_to_device(dpnp.float64, self._sycl_device)
 
-        self._random_state = MT19937(self._seed, self._sycl_queue)
+        # TODO: rework through pybind11 extension for MKL engine and distribution classes
+        if is_cpu:
+            self._random_state = MT19937(self._seed, self._sycl_queue)
+        else:
+            # MCG59 is assumed to provide a better performance on GPU than MT19937
+            self._random_state = MCG59(self._seed, self._sycl_queue)
         self._fallback_random_state = call_origin(numpy.random.RandomState, seed, allow_fallback=True)
 
 
diff --git a/dpnp/version.py b/dpnp/version.py
index 8fa967380a84..de9feba31e33 100644
--- a/dpnp/version.py
+++ b/dpnp/version.py
@@ -29,6 +29,6 @@
 DPNP version module
 """
 
-__version__: str = '0.11.2dev1'
+__version__: str = '0.12.0'
 
 version: str = __version__
diff --git a/tests/skipped_tests.tbl b/tests/skipped_tests.tbl
index d4d77828b61a..d0fb01ec90a0 100644
--- a/tests/skipped_tests.tbl
+++ b/tests/skipped_tests.tbl
@@ -232,8 +232,6 @@ tests/third_party/cupy/core_tests/test_ndarray_copy_and_view.py::TestArrayCopyAn
 tests/third_party/cupy/core_tests/test_ndarray_copy_and_view.py::TestArrayCopyAndView::test_astype_strides_swapped
 tests/third_party/cupy/core_tests/test_ndarray_copy_and_view.py::TestArrayCopyAndView::test_astype_type_c_contiguous_no_copy
 tests/third_party/cupy/core_tests/test_ndarray_copy_and_view.py::TestArrayCopyAndView::test_astype_type_f_contiguous_no_copy
-tests/third_party/cupy/core_tests/test_ndarray_copy_and_view.py::TestArrayCopyAndView::test_transposed_fill
-tests/third_party/cupy/core_tests/test_ndarray_copy_and_view.py::TestArrayCopyAndView::test_transposed_flatten
 tests/third_party/cupy/core_tests/test_ndarray_copy_and_view.py::TestArrayCopyAndView::test_flatten
 tests/third_party/cupy/core_tests/test_ndarray_copy_and_view.py::TestArrayCopyAndView::test_flatten_copied
 tests/third_party/cupy/core_tests/test_ndarray_copy_and_view.py::TestArrayCopyAndView::test_isinstance_numpy_copy
@@ -682,55 +680,9 @@ tests/third_party/cupy/manipulation_tests/test_shape.py::TestRavel::test_ravel2
 tests/third_party/cupy/manipulation_tests/test_shape.py::TestRavel::test_ravel3
 tests/third_party/cupy/manipulation_tests/test_shape.py::TestRavel::test_external_ravel
 tests/third_party/cupy/manipulation_tests/test_shape.py::TestRavel::test_ravel
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshapeOrder_param_0_{order_init='C', order_reshape='C', shape_in_out=((2, 3), (1, 6, 1))}::test_reshape_contiguity
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshapeOrder_param_10_{order_init='C', order_reshape='c', shape_in_out=((6,), (2, 3))}::test_reshape_contiguity
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshapeOrder_param_11_{order_init='C', order_reshape='c', shape_in_out=((3, 3, 3), (9, 3))}::test_reshape_contiguity
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshapeOrder_param_12_{order_init='C', order_reshape='f', shape_in_out=((2, 3), (1, 6, 1))}::test_reshape_contiguity
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshapeOrder_param_13_{order_init='C', order_reshape='f', shape_in_out=((6,), (2, 3))}::test_reshape_contiguity
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshapeOrder_param_14_{order_init='C', order_reshape='f', shape_in_out=((3, 3, 3), (9, 3))}::test_reshape_contiguity
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshapeOrder_param_15_{order_init='C', order_reshape='a', shape_in_out=((2, 3), (1, 6, 1))}::test_reshape_contiguity
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshapeOrder_param_16_{order_init='C', order_reshape='a', shape_in_out=((6,), (2, 3))}::test_reshape_contiguity
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshapeOrder_param_17_{order_init='C', order_reshape='a', shape_in_out=((3, 3, 3), (9, 3))}::test_reshape_contiguity
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshapeOrder_param_18_{order_init='F', order_reshape='C', shape_in_out=((2, 3), (1, 6, 1))}::test_reshape_contiguity
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshapeOrder_param_19_{order_init='F', order_reshape='C', shape_in_out=((6,), (2, 3))}::test_reshape_contiguity
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshapeOrder_param_1_{order_init='C', order_reshape='C', shape_in_out=((6,), (2, 3))}::test_reshape_contiguity
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshapeOrder_param_20_{order_init='F', order_reshape='C', shape_in_out=((3, 3, 3), (9, 3))}::test_reshape_contiguity
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshapeOrder_param_21_{order_init='F', order_reshape='F', shape_in_out=((2, 3), (1, 6, 1))}::test_reshape_contiguity
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshapeOrder_param_22_{order_init='F', order_reshape='F', shape_in_out=((6,), (2, 3))}::test_reshape_contiguity
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshapeOrder_param_23_{order_init='F', order_reshape='F', shape_in_out=((3, 3, 3), (9, 3))}::test_reshape_contiguity
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshapeOrder_param_24_{order_init='F', order_reshape='A', shape_in_out=((2, 3), (1, 6, 1))}::test_reshape_contiguity
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshapeOrder_param_25_{order_init='F', order_reshape='A', shape_in_out=((6,), (2, 3))}::test_reshape_contiguity
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshapeOrder_param_26_{order_init='F', order_reshape='A', shape_in_out=((3, 3, 3), (9, 3))}::test_reshape_contiguity
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshapeOrder_param_27_{order_init='F', order_reshape='c', shape_in_out=((2, 3), (1, 6, 1))}::test_reshape_contiguity
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshapeOrder_param_28_{order_init='F', order_reshape='c', shape_in_out=((6,), (2, 3))}::test_reshape_contiguity
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshapeOrder_param_29_{order_init='F', order_reshape='c', shape_in_out=((3, 3, 3), (9, 3))}::test_reshape_contiguity
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshapeOrder_param_2_{order_init='C', order_reshape='C', shape_in_out=((3, 3, 3), (9, 3))}::test_reshape_contiguity
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshapeOrder_param_30_{order_init='F', order_reshape='f', shape_in_out=((2, 3), (1, 6, 1))}::test_reshape_contiguity
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshapeOrder_param_31_{order_init='F', order_reshape='f', shape_in_out=((6,), (2, 3))}::test_reshape_contiguity
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshapeOrder_param_32_{order_init='F', order_reshape='f', shape_in_out=((3, 3, 3), (9, 3))}::test_reshape_contiguity
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshapeOrder_param_33_{order_init='F', order_reshape='a', shape_in_out=((2, 3), (1, 6, 1))}::test_reshape_contiguity
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshapeOrder_param_34_{order_init='F', order_reshape='a', shape_in_out=((6,), (2, 3))}::test_reshape_contiguity
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshapeOrder_param_35_{order_init='F', order_reshape='a', shape_in_out=((3, 3, 3), (9, 3))}::test_reshape_contiguity
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshapeOrder_param_3_{order_init='C', order_reshape='F', shape_in_out=((2, 3), (1, 6, 1))}::test_reshape_contiguity
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshapeOrder_param_4_{order_init='C', order_reshape='F', shape_in_out=((6,), (2, 3))}::test_reshape_contiguity
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshapeOrder_param_5_{order_init='C', order_reshape='F', shape_in_out=((3, 3, 3), (9, 3))}::test_reshape_contiguity
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshapeOrder_param_6_{order_init='C', order_reshape='A', shape_in_out=((2, 3), (1, 6, 1))}::test_reshape_contiguity
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshapeOrder_param_7_{order_init='C', order_reshape='A', shape_in_out=((6,), (2, 3))}::test_reshape_contiguity
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshapeOrder_param_8_{order_init='C', order_reshape='A', shape_in_out=((3, 3, 3), (9, 3))}::test_reshape_contiguity
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshapeOrder_param_9_{order_init='C', order_reshape='c', shape_in_out=((2, 3), (1, 6, 1))}::test_reshape_contiguity
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshape::test_external_reshape
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshape::test_nocopy_reshape
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshape::test_nocopy_reshape_with_order
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshape::test_reshape2
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshape::test_reshape_strides
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshape::test_reshape_with_unknown_dimension
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshape::test_transposed_reshape2
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestShape_param_0_{shape=(2, 3)}::test_shape
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestShape_param_0_{shape=(2, 3)}::test_shape_list
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestShape_param_1_{shape=()}::test_shape
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestShape_param_1_{shape=()}::test_shape_list
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestShape_param_2_{shape=(4,)}::test_shape
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestShape_param_2_{shape=(4,)}::test_shape_list
+tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshape::test_reshape_zerosize
+tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshape::test_reshape_zerosize2
+
 tests/third_party/cupy/manipulation_tests/test_tiling.py::TestRepeatRepeatsNdarray::test_func
 tests/third_party/cupy/manipulation_tests/test_tiling.py::TestRepeatRepeatsNdarray::test_method
 tests/third_party/cupy/manipulation_tests/test_tiling.py::TestTileFailure_param_0_{reps=-1}::test_tile_failure
@@ -831,23 +783,6 @@ tests/third_party/cupy/math_tests/test_rounding.py::TestRounding::test_rint
 tests/third_party/cupy/math_tests/test_rounding.py::TestRounding::test_rint_negative
 tests/third_party/cupy/math_tests/test_rounding.py::TestRounding::test_round_
 tests/third_party/cupy/math_tests/test_rounding.py::TestRounding::test_trunc
-tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_sum_all
-tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_sum_all2
-tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_sum_all_keepdims
-tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_sum_all_transposed
-tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_sum_all_transposed2
-tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_sum_axes
-tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_sum_axes2
-tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_sum_axes3
-tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_sum_axes4
-tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_sum_axis
-tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_sum_axis2
-tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_sum_axis_huge
-tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_sum_axis_transposed
-tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_sum_axis_transposed2
-tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_sum_dtype
-tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_sum_keepdims_and_dtype
-tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_sum_keepdims_multiple_axes
 tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_sum_out
 tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_sum_out_wrong_shape
 tests/third_party/cupy/math_tests/test_sumprod.py::TestCumprod::test_ndarray_cumprod_2dim_with_axis
@@ -882,8 +817,6 @@ tests/third_party/cupy/math_tests/test_sumprod.py::TestNansumNanprodLong_param_1
 tests/third_party/cupy/math_tests/test_sumprod.py::TestNansumNanprodLong_param_15_{axis=0, func='nanprod', keepdims=False, shape=(20, 30, 40), transpose_axes=False}::test_nansum_axis_transposed
 tests/third_party/cupy/math_tests/test_sumprod.py::TestNansumNanprodLong_param_9_{axis=0, func='nanprod', keepdims=True, shape=(2, 3, 4), transpose_axes=False}::test_nansum_all
 tests/third_party/cupy/math_tests/test_sumprod.py::TestNansumNanprodLong_param_9_{axis=0, func='nanprod', keepdims=True, shape=(2, 3, 4), transpose_axes=False}::test_nansum_axis_transposed
-tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_sum_all2
-tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_sum_all_transposed2
 tests/third_party/cupy/math_tests/test_trigonometric.py::TestUnwrap::test_unwrap_1dim
 tests/third_party/cupy/math_tests/test_trigonometric.py::TestUnwrap::test_unwrap_1dim_with_discont
 tests/third_party/cupy/math_tests/test_trigonometric.py::TestUnwrap::test_unwrap_2dim_with_axis
@@ -1056,6 +989,7 @@ tests/third_party/cupy/random_tests/test_sample.py::TestMultinomial_param_1_{siz
 tests/third_party/cupy/random_tests/test_sample.py::TestMultinomial_param_2_{size=4}::test_multinomial
 tests/third_party/cupy/random_tests/test_sample.py::TestMultinomial_param_3_{size=(0,)}::test_multinomial
 tests/third_party/cupy/random_tests/test_sample.py::TestMultinomial_param_4_{size=(1, 0)}::test_multinomial
+tests/third_party/cupy/random_tests/test_sample.py::TestRandint2::test_bound_float1
 tests/third_party/cupy/random_tests/test_sample.py::TestRandint2::test_goodness_of_fit
 tests/third_party/cupy/random_tests/test_sample.py::TestRandint2::test_goodness_of_fit_2
 
@@ -1304,7 +1238,6 @@ tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_h
 tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_same_value
 
 tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_weights_mismatch
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestMeanVar::test_external_mean_axis
 tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanMeanAdditional::test_nanmean_all_nan
 tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanMeanAdditional::test_nanmean_float16
 tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanMeanAdditional::test_nanmean_huge
diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl
index f18d39cd9f48..727d9650bfb9 100644
--- a/tests/skipped_tests_gpu.tbl
+++ b/tests/skipped_tests_gpu.tbl
@@ -86,13 +86,10 @@ tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_para
 
 tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_external_prod_all
 tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_external_prod_axis
-tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_external_sum_all
-tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_external_sum_axis
 tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_prod_all
 tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_prod_axis
-tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_sum_all
-tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_sum_all2
-tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_sum_all_keepdims
+tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_sum_out
+tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_sum_out_wrong_shape
 tests/third_party/cupy/math_tests/test_sumprod.py::TestCumprod::test_cumprod_1dim
 tests/third_party/cupy/math_tests/test_sumprod.py::TestCumprod::test_cumprod_2dim_without_axis
 tests/third_party/cupy/math_tests/test_sumprod.py::TestCumsum_param_0_{axis=0}::test_cumsum
@@ -142,7 +139,6 @@ tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsPois
 
 tests/test_random.py::TestPermutationsTestShuffle::test_shuffle1[lambda x: dpnp.asarray([[i, i] for i in x])]
 
-tests/test_arraymanipulation.py::TestConcatenate::test_concatenate
 tests/test_histograms.py::TestHistogram::test_density
 
 tests/test_random.py::TestPermutationsTestShuffle::test_shuffle1[lambda x: dpnp.astype(dpnp.asarray(x), dpnp.int8)]
@@ -214,7 +210,6 @@ tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumError::test_too_ma
 tests/third_party/cupy/linalg_tests/test_einsum.py::TestListArgEinSumError::test_dim_mismatch3
 tests/third_party/cupy/linalg_tests/test_einsum.py::TestListArgEinSumError::test_too_many_dims3
 
-tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_reversed_outer
 tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_reversed_vdot
 tests/third_party/cupy/manipulation_tests/test_basic.py::TestCopytoFromScalar_param_7_{dst_shape=(0,), src=3.2}::test_copyto_where
 tests/third_party/cupy/manipulation_tests/test_basic.py::TestCopytoFromScalar_param_8_{dst_shape=(0,), src=0}::test_copyto_where
@@ -244,7 +239,7 @@ tests/third_party/cupy/manipulation_tests/test_basic.py::TestCopytoFromScalar_pa
 tests/third_party/cupy/manipulation_tests/test_basic.py::TestCopytoFromScalar_param_32_{dst_shape=(2, 2), src=True}::test_copyto_where
 tests/third_party/cupy/manipulation_tests/test_basic.py::TestCopytoFromScalar_param_33_{dst_shape=(2, 2), src=False}::test_copyto_where
 tests/third_party/cupy/manipulation_tests/test_basic.py::TestCopytoFromScalar_param_34_{dst_shape=(2, 2), src=(1+1j)}::test_copyto_where
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshape::test_reshape_zerosize
+
 tests/third_party/cupy/math_tests/test_sumprod.py::TestCumprod::test_cumprod_out_noncontiguous
 tests/third_party/cupy/math_tests/test_sumprod.py::TestCumsum_param_0_{axis=0}::test_cumsum_axis_out_noncontiguous
 tests/third_party/cupy/math_tests/test_sumprod.py::TestCumsum_param_0_{axis=0}::test_cumsum_out_noncontiguous
@@ -257,9 +252,6 @@ tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsMult
 tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsMultivariateNormal_param_2_{d=4, shape=(4, 3, 2)}::test_normal
 tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsMultivariateNormal_param_3_{d=4, shape=(3, 2)}::test_normal
 
-tests/third_party/cupy/statistics_tests/test_correlation.py::TestCov::test_cov_empty
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestMeanVar::test_external_mean_axis
-
 tests/third_party/intel/test_zero_copy_test1.py::test_dpnp_interaction_with_dpctl_memory
 tests/test_arraymanipulation.py::TestHstack::test_generator
 tests/test_arraymanipulation.py::TestVstack::test_generator
@@ -356,8 +348,6 @@ tests/third_party/cupy/core_tests/test_ndarray_copy_and_view.py::TestArrayCopyAn
 tests/third_party/cupy/core_tests/test_ndarray_copy_and_view.py::TestArrayCopyAndView::test_astype_strides_swapped
 tests/third_party/cupy/core_tests/test_ndarray_copy_and_view.py::TestArrayCopyAndView::test_astype_type_c_contiguous_no_copy
 tests/third_party/cupy/core_tests/test_ndarray_copy_and_view.py::TestArrayCopyAndView::test_astype_type_f_contiguous_no_copy
-tests/third_party/cupy/core_tests/test_ndarray_copy_and_view.py::TestArrayCopyAndView::test_transposed_fill
-tests/third_party/cupy/core_tests/test_ndarray_copy_and_view.py::TestArrayCopyAndView::test_transposed_flatten
 tests/third_party/cupy/core_tests/test_ndarray_copy_and_view.py::TestArrayCopyAndView::test_flatten_copied
 tests/third_party/cupy/core_tests/test_ndarray_copy_and_view.py::TestArrayCopyAndView::test_flatten
 tests/third_party/cupy/core_tests/test_ndarray_copy_and_view.py::TestArrayCopyAndView::test_isinstance_numpy_copy
@@ -763,15 +753,11 @@ tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumLarge_param_9_{opt
 tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumUnaryOperationWithScalar::test_scalar_float
 tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumUnaryOperationWithScalar::test_scalar_int
 tests/third_party/cupy/linalg_tests/test_einsum.py::TestListArgEinSumError::test_invalid_sub1
-tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_transposed_dot
 tests/third_party/cupy/linalg_tests/test_product.py::TestMatrixPower::test_matrix_power_invlarge
 tests/third_party/cupy/linalg_tests/test_product.py::TestMatrixPower::test_matrix_power_large
 tests/third_party/cupy/linalg_tests/test_product.py::TestMatrixPower::test_matrix_power_of_two
 tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_dot_vec2
 tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_multidim_vdot
-tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_transposed_dot
-tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_transposed_dot_with_out
-tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_transposed_higher_order_inner
 tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_transposed_tensordot
 tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_transposed_tensordot_with_int_axes
 tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_transposed_tensordot_with_list_axes
@@ -825,55 +811,9 @@ tests/third_party/cupy/manipulation_tests/test_shape.py::TestRavel::test_ravel2
 tests/third_party/cupy/manipulation_tests/test_shape.py::TestRavel::test_ravel3
 tests/third_party/cupy/manipulation_tests/test_shape.py::TestRavel::test_external_ravel
 tests/third_party/cupy/manipulation_tests/test_shape.py::TestRavel::test_ravel
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshapeOrder_param_0_{order_init='C', order_reshape='C', shape_in_out=((2, 3), (1, 6, 1))}::test_reshape_contiguity
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshapeOrder_param_10_{order_init='C', order_reshape='c', shape_in_out=((6,), (2, 3))}::test_reshape_contiguity
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshapeOrder_param_11_{order_init='C', order_reshape='c', shape_in_out=((3, 3, 3), (9, 3))}::test_reshape_contiguity
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshapeOrder_param_12_{order_init='C', order_reshape='f', shape_in_out=((2, 3), (1, 6, 1))}::test_reshape_contiguity
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshapeOrder_param_13_{order_init='C', order_reshape='f', shape_in_out=((6,), (2, 3))}::test_reshape_contiguity
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshapeOrder_param_14_{order_init='C', order_reshape='f', shape_in_out=((3, 3, 3), (9, 3))}::test_reshape_contiguity
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshapeOrder_param_15_{order_init='C', order_reshape='a', shape_in_out=((2, 3), (1, 6, 1))}::test_reshape_contiguity
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshapeOrder_param_16_{order_init='C', order_reshape='a', shape_in_out=((6,), (2, 3))}::test_reshape_contiguity
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshapeOrder_param_17_{order_init='C', order_reshape='a', shape_in_out=((3, 3, 3), (9, 3))}::test_reshape_contiguity
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshapeOrder_param_18_{order_init='F', order_reshape='C', shape_in_out=((2, 3), (1, 6, 1))}::test_reshape_contiguity
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshapeOrder_param_19_{order_init='F', order_reshape='C', shape_in_out=((6,), (2, 3))}::test_reshape_contiguity
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshapeOrder_param_1_{order_init='C', order_reshape='C', shape_in_out=((6,), (2, 3))}::test_reshape_contiguity
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshapeOrder_param_20_{order_init='F', order_reshape='C', shape_in_out=((3, 3, 3), (9, 3))}::test_reshape_contiguity
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshapeOrder_param_21_{order_init='F', order_reshape='F', shape_in_out=((2, 3), (1, 6, 1))}::test_reshape_contiguity
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshapeOrder_param_22_{order_init='F', order_reshape='F', shape_in_out=((6,), (2, 3))}::test_reshape_contiguity
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshapeOrder_param_23_{order_init='F', order_reshape='F', shape_in_out=((3, 3, 3), (9, 3))}::test_reshape_contiguity
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshapeOrder_param_24_{order_init='F', order_reshape='A', shape_in_out=((2, 3), (1, 6, 1))}::test_reshape_contiguity
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshapeOrder_param_25_{order_init='F', order_reshape='A', shape_in_out=((6,), (2, 3))}::test_reshape_contiguity
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshapeOrder_param_26_{order_init='F', order_reshape='A', shape_in_out=((3, 3, 3), (9, 3))}::test_reshape_contiguity
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshapeOrder_param_27_{order_init='F', order_reshape='c', shape_in_out=((2, 3), (1, 6, 1))}::test_reshape_contiguity
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshapeOrder_param_28_{order_init='F', order_reshape='c', shape_in_out=((6,), (2, 3))}::test_reshape_contiguity
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshapeOrder_param_29_{order_init='F', order_reshape='c', shape_in_out=((3, 3, 3), (9, 3))}::test_reshape_contiguity
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshapeOrder_param_2_{order_init='C', order_reshape='C', shape_in_out=((3, 3, 3), (9, 3))}::test_reshape_contiguity
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshapeOrder_param_30_{order_init='F', order_reshape='f', shape_in_out=((2, 3), (1, 6, 1))}::test_reshape_contiguity
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshapeOrder_param_31_{order_init='F', order_reshape='f', shape_in_out=((6,), (2, 3))}::test_reshape_contiguity
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshapeOrder_param_32_{order_init='F', order_reshape='f', shape_in_out=((3, 3, 3), (9, 3))}::test_reshape_contiguity
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshapeOrder_param_33_{order_init='F', order_reshape='a', shape_in_out=((2, 3), (1, 6, 1))}::test_reshape_contiguity
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshapeOrder_param_34_{order_init='F', order_reshape='a', shape_in_out=((6,), (2, 3))}::test_reshape_contiguity
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshapeOrder_param_35_{order_init='F', order_reshape='a', shape_in_out=((3, 3, 3), (9, 3))}::test_reshape_contiguity
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshapeOrder_param_3_{order_init='C', order_reshape='F', shape_in_out=((2, 3), (1, 6, 1))}::test_reshape_contiguity
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshapeOrder_param_4_{order_init='C', order_reshape='F', shape_in_out=((6,), (2, 3))}::test_reshape_contiguity
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshapeOrder_param_5_{order_init='C', order_reshape='F', shape_in_out=((3, 3, 3), (9, 3))}::test_reshape_contiguity
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshapeOrder_param_6_{order_init='C', order_reshape='A', shape_in_out=((2, 3), (1, 6, 1))}::test_reshape_contiguity
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshapeOrder_param_7_{order_init='C', order_reshape='A', shape_in_out=((6,), (2, 3))}::test_reshape_contiguity
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshapeOrder_param_8_{order_init='C', order_reshape='A', shape_in_out=((3, 3, 3), (9, 3))}::test_reshape_contiguity
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshapeOrder_param_9_{order_init='C', order_reshape='c', shape_in_out=((2, 3), (1, 6, 1))}::test_reshape_contiguity
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshape::test_external_reshape
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshape::test_nocopy_reshape
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshape::test_nocopy_reshape_with_order
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshape::test_reshape2
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshape::test_reshape_strides
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshape::test_reshape_with_unknown_dimension
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshape::test_transposed_reshape2
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestShape_param_0_{shape=(2, 3)}::test_shape
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestShape_param_0_{shape=(2, 3)}::test_shape_list
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestShape_param_1_{shape=()}::test_shape
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestShape_param_1_{shape=()}::test_shape_list
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestShape_param_2_{shape=(4,)}::test_shape
-tests/third_party/cupy/manipulation_tests/test_shape.py::TestShape_param_2_{shape=(4,)}::test_shape_list
+tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshape::test_reshape_zerosize
+tests/third_party/cupy/manipulation_tests/test_shape.py::TestReshape::test_reshape_zerosize2
+
 tests/third_party/cupy/manipulation_tests/test_tiling.py::TestRepeatRepeatsNdarray::test_func
 tests/third_party/cupy/manipulation_tests/test_tiling.py::TestRepeatRepeatsNdarray::test_method
 tests/third_party/cupy/manipulation_tests/test_tiling.py::TestTileFailure_param_0_{reps=-1}::test_tile_failure
@@ -974,22 +914,6 @@ tests/third_party/cupy/math_tests/test_rounding.py::TestRounding::test_rint
 tests/third_party/cupy/math_tests/test_rounding.py::TestRounding::test_rint_negative
 tests/third_party/cupy/math_tests/test_rounding.py::TestRounding::test_round_
 tests/third_party/cupy/math_tests/test_rounding.py::TestRounding::test_trunc
-tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_sum_all_transposed
-tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_sum_all_transposed2
-tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_sum_axes
-tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_sum_axes2
-tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_sum_axes3
-tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_sum_axes4
-tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_sum_axis
-tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_sum_axis2
-tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_sum_axis_huge
-tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_sum_axis_transposed
-tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_sum_axis_transposed2
-tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_sum_dtype
-tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_sum_keepdims_and_dtype
-tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_sum_keepdims_multiple_axes
-tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_sum_out
-tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_sum_out_wrong_shape
 tests/third_party/cupy/math_tests/test_sumprod.py::TestCumprod::test_ndarray_cumprod_2dim_with_axis
 tests/third_party/cupy/math_tests/test_sumprod.py::TestDiff::test_diff_1dim
 tests/third_party/cupy/math_tests/test_sumprod.py::TestDiff::test_diff_1dim_with_n
@@ -1151,6 +1075,7 @@ tests/third_party/cupy/random_tests/test_sample.py::TestMultinomial_param_1_{siz
 tests/third_party/cupy/random_tests/test_sample.py::TestMultinomial_param_2_{size=4}::test_multinomial
 tests/third_party/cupy/random_tests/test_sample.py::TestMultinomial_param_3_{size=(0,)}::test_multinomial
 tests/third_party/cupy/random_tests/test_sample.py::TestMultinomial_param_4_{size=(1, 0)}::test_multinomial
+tests/third_party/cupy/random_tests/test_sample.py::TestRandint2::test_bound_float1
 tests/third_party/cupy/random_tests/test_sample.py::TestRandint2::test_goodness_of_fit
 tests/third_party/cupy/random_tests/test_sample.py::TestRandint2::test_goodness_of_fit_2
 tests/third_party/cupy/random_tests/test_sample.py::TestRandomIntegers2::test_bound_1
@@ -1374,7 +1299,7 @@ tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_h
 tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_array_bins
 tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_bins_not_ordered
 tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_complex_weights
-tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_complex_weights_uneven_bins 
+tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_complex_weights_uneven_bins
 tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_density
 tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_empty
 tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_float_weights
diff --git a/tests/test_manipulation.py b/tests/test_manipulation.py
index bb91f5d0d500..b8ee2cfaa971 100644
--- a/tests/test_manipulation.py
+++ b/tests/test_manipulation.py
@@ -1,5 +1,10 @@
 import pytest
+
 import numpy
+from numpy.testing import (
+    assert_array_equal
+)
+
 import dpnp
 
 
@@ -20,7 +25,7 @@ def test_copyto_dtype(in_obj, out_dtype):
     result = dpnp.empty(dparr.size, dtype=out_dtype)
     dpnp.copyto(result, dparr)
 
-    numpy.testing.assert_array_equal(result, expected)
+    assert_array_equal(result, expected)
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
@@ -32,7 +37,26 @@ def test_repeat(arr):
     dpnp_a = dpnp.array(arr)
     expected = numpy.repeat(a, 2)
     result = dpnp.repeat(dpnp_a, 2)
-    numpy.testing.assert_array_equal(expected, result)
+    assert_array_equal(expected, result)
+
+
+def test_result_type():
+    X = [dpnp.ones((2), dtype=dpnp.int64), dpnp.int32, "float16"]
+    X_np = [numpy.ones((2), dtype=numpy.int64), numpy.int32, "float16"]
+
+    assert dpnp.result_type(*X) == numpy.result_type(*X_np)
+
+def test_result_type_only_dtypes():
+    X = [dpnp.int64, dpnp.int32, dpnp.bool, dpnp.float32]
+    X_np = [numpy.int64, numpy.int32, numpy.bool_, numpy.float32]
+
+    assert dpnp.result_type(*X) == numpy.result_type(*X_np)
+
+def test_result_type_only_arrays():
+    X = [dpnp.ones((2), dtype=dpnp.int64), dpnp.ones((7, 4), dtype=dpnp.int32)]
+    X_np = [numpy.ones((2), dtype=numpy.int64), numpy.ones((7, 4), dtype=numpy.int32)]
+
+    assert dpnp.result_type(*X) == numpy.result_type(*X_np)
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
@@ -51,4 +75,32 @@ def test_unique(array):
 
     expected = numpy.unique(np_a)
     result = dpnp.unique(dpnp_a)
-    numpy.testing.assert_array_equal(expected, result)
+    assert_array_equal(expected, result)
+
+
+class TestTranspose:
+    @pytest.mark.parametrize("axes", [(0, 1), (1, 0)])
+    def test_2d_with_axes(self, axes):
+        na = numpy.array([[1, 2], [3, 4]])
+        da = dpnp.array(na)
+
+        expected = numpy.transpose(na, axes)
+        result = dpnp.transpose(da, axes)
+        assert_array_equal(expected, result)
+
+    @pytest.mark.parametrize("axes", [(1, 0, 2), ((1, 0, 2),)])
+    def test_3d_with_packed_axes(self, axes):
+        na = numpy.ones((1, 2, 3))
+        da = dpnp.array(na)
+
+        expected = na.transpose(*axes)
+        result = da.transpose(*axes)
+        assert_array_equal(expected, result)
+
+    @pytest.mark.parametrize("shape", [(10,), (2, 4), (5, 3, 7), (3, 8, 4, 1)])
+    def test_none_axes(self, shape):
+        na = numpy.ones(shape)
+        da = dpnp.ones(shape)
+
+        assert_array_equal(na.transpose(), da.transpose())
+        assert_array_equal(na.transpose(None), da.transpose(None))
diff --git a/tests/test_mathematical.py b/tests/test_mathematical.py
index 6224eb57ab81..bdd4257e646b 100644
--- a/tests/test_mathematical.py
+++ b/tests/test_mathematical.py
@@ -387,7 +387,7 @@ def test_ediff1d_int(self, array, data_type):
         expected = numpy.ediff1d(np_a)
         assert_array_equal(expected, result)
 
-    
+
     @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_ediff1d_args(self):
         np_a = numpy.array([1, 2, 4, 7, 0])
@@ -667,25 +667,28 @@ def test_out_dtypes(self, dtype):
 
         dp_array1 = dpnp.arange(size, 2 * size, dtype=dtype)
         dp_array2 = dpnp.arange(size, dtype=dtype)
+
         dp_out = dpnp.empty(size, dtype=dpnp.complex64)
-        result = dpnp.add(dp_array1, dp_array2, out=dp_out)
+        if dtype != dpnp.complex64:
+            # dtype of out mismatches types of input arrays
+            with pytest.raises(TypeError):
+                dpnp.add(dp_array1, dp_array2, out=dp_out)
+
+            # allocate new out with expected type
+            dp_out = dpnp.empty(size, dtype=dtype)
 
+        result = dpnp.add(dp_array1, dp_array2, out=dp_out)
         assert_array_equal(expected, result)
 
     @pytest.mark.parametrize("dtype", get_all_dtypes(no_none=True))
     def test_out_overlap(self, dtype):
         size = 1 if dtype == dpnp.bool else 15
-
-        np_a = numpy.arange(2 * size, dtype=dtype)
-        expected = numpy.add(np_a[size::], np_a[::2], out=np_a[:size:])
-
         dp_a = dpnp.arange(2 * size, dtype=dtype)
-        result = dpnp.add(dp_a[size::], dp_a[::2], out=dp_a[:size:])
-
-        assert_allclose(expected, result)
-        assert_allclose(dp_a, np_a)
+        with pytest.raises(TypeError):
+            dpnp.add(dp_a[size::], dp_a[::2], out=dp_a[:size:])
 
     @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_none=True))
+    @pytest.mark.skip("mute unttil in-place support in dpctl is done")
     def test_inplace_strided_out(self, dtype):
         size = 21
 
@@ -705,7 +708,7 @@ def test_invalid_shape(self, shape):
         dp_array2 = dpnp.arange(5, 15, dtype=dpnp.float64)
         dp_out = dpnp.empty(shape, dtype=dpnp.float64)
 
-        with pytest.raises(ValueError):
+        with pytest.raises(TypeError):
             dpnp.add(dp_array1, dp_array2, out=dp_out)
 
     @pytest.mark.parametrize("out",
@@ -750,25 +753,28 @@ def test_out_dtypes(self, dtype):
 
         dp_array1 = dpnp.arange(size, 2 * size, dtype=dtype)
         dp_array2 = dpnp.arange(size, dtype=dtype)
+
         dp_out = dpnp.empty(size, dtype=dpnp.complex64)
-        result = dpnp.multiply(dp_array1, dp_array2, out=dp_out)
+        if dtype != dpnp.complex64:
+            # dtype of out mismatches types of input arrays
+            with pytest.raises(TypeError):
+                dpnp.multiply(dp_array1, dp_array2, out=dp_out)
 
+            # allocate new out with expected type
+            dp_out = dpnp.empty(size, dtype=dtype)
+
+        result = dpnp.multiply(dp_array1, dp_array2, out=dp_out)
         assert_array_equal(expected, result)
 
     @pytest.mark.parametrize("dtype", get_all_dtypes(no_none=True))
     def test_out_overlap(self, dtype):
         size = 1 if dtype == dpnp.bool else 15
-
-        np_a = numpy.arange(2 * size, dtype=dtype)
-        expected = numpy.multiply(np_a[size::], np_a[::2], out=np_a[:size:])
-
         dp_a = dpnp.arange(2 * size, dtype=dtype)
-        result = dpnp.multiply(dp_a[size::], dp_a[::2], out=dp_a[:size:])
-
-        assert_allclose(expected, result)
-        assert_allclose(dp_a, np_a)
+        with pytest.raises(TypeError):
+            dpnp.multiply(dp_a[size::], dp_a[::2], out=dp_a[:size:])
 
     @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_none=True))
+    @pytest.mark.skip("mute unttil in-place support in dpctl is done")
     def test_inplace_strided_out(self, dtype):
         size = 21
 
@@ -788,7 +794,7 @@ def test_invalid_shape(self, shape):
         dp_array2 = dpnp.arange(5, 15, dtype=dpnp.float64)
         dp_out = dpnp.empty(shape, dtype=dpnp.float64)
 
-        with pytest.raises(ValueError):
+        with pytest.raises(TypeError):
             dpnp.multiply(dp_array1, dp_array2, out=dp_out)
 
     @pytest.mark.parametrize("out",
@@ -934,6 +940,7 @@ def test_sum_empty(dtype, axis):
     assert_array_equal(numpy_res, dpnp_res.asnumpy())
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @pytest.mark.parametrize("dtype", get_all_dtypes(no_complex=True, no_bool=True))
 def test_sum_empty_out(dtype):
     a = dpnp.empty((1, 2, 0, 4), dtype=dtype)
@@ -955,3 +962,44 @@ def test_sum(shape, dtype_in, dtype_out):
             numpy_res = a_np.sum(axis=axis, dtype=dtype_out)
             dpnp_res = a.sum(axis=axis, dtype=dtype_out)
             assert_array_equal(numpy_res, dpnp_res.asnumpy())
+
+class TestMean:
+
+    @pytest.mark.parametrize("dtype", get_all_dtypes())
+    def test_mean_axis_tuple(self, dtype):
+        dp_array = dpnp.array([[0,1,2],[3,4,0]], dtype=dtype)
+        np_array = dpnp.asnumpy(dp_array)
+
+        result = dpnp.mean(dp_array, axis=(0,1))
+        expected = numpy.mean(np_array, axis=(0,1))
+        assert_allclose(expected, result)
+
+
+    def test_mean_axis_zero_size(self):
+        dp_array = dpnp.array([], dtype='int64')
+        np_array = dpnp.asnumpy(dp_array)
+
+        result = dpnp.mean(dp_array)
+        expected = numpy.mean(np_array)
+        assert_allclose(expected, result)
+
+
+    def test_mean_strided(self):
+        dp_array = dpnp.array([-2,-1,0,1,0,2], dtype='f4')
+        np_array = dpnp.asnumpy(dp_array)
+
+        result = dpnp.mean(dp_array[::-1])
+        expected = numpy.mean(np_array[::-1])
+        assert_allclose(expected, result)
+
+        result = dpnp.mean(dp_array[::2])
+        expected = numpy.mean(np_array[::2])
+        assert_allclose(expected, result)
+
+    def test_mean_scalar(self):
+        dp_array = dpnp.array(5)
+        np_array = dpnp.asnumpy(dp_array)
+
+        result = dp_array.mean()
+        expected = np_array.mean()
+        assert_allclose(expected, result)
diff --git a/tests/test_outer.py b/tests/test_outer.py
index 6c91fad45df4..3ac751b2b280 100644
--- a/tests/test_outer.py
+++ b/tests/test_outer.py
@@ -3,6 +3,7 @@
 
 import dpnp as dp
 import numpy as np
+import pytest
 
 from numpy.testing import assert_raises
 
@@ -40,23 +41,21 @@ def test_the_same_matrix(self, xp, dtype):
 
 class TestScalarOuter(unittest.TestCase):
 
-    @unittest.skip("A scalar isn't currently supported as input")
     @testing.for_all_dtypes()
-    @testing.numpy_cupy_allclose()
+    @testing.numpy_cupy_allclose(type_check=False)
     def test_first_is_scalar(self, xp, dtype):
         scalar = xp.int64(4)
         a = xp.arange(5**3, dtype=dtype).reshape(5, 5, 5)
         return xp.outer(scalar, a)
 
-    @unittest.skip("A scalar isn't currently supported as input")
     @testing.for_all_dtypes()
-    @testing.numpy_cupy_allclose()
+    @testing.numpy_cupy_allclose(type_check=False)
     def test_second_is_scalar(self, xp, dtype):
         scalar = xp.int32(7)
         a = xp.arange(5**3, dtype=dtype).reshape(5, 5, 5)
         return xp.outer(a, scalar)
 
-    @unittest.skip("A scalar isn't currently supported as input")
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.numpy_cupy_array_equal()
     def test_both_inputs_as_scalar(self, xp):
         a = xp.int64(4)
diff --git a/tests/test_random_state.py b/tests/test_random_state.py
index e65e26b4a8ea..ad6483c07219 100644
--- a/tests/test_random_state.py
+++ b/tests/test_random_state.py
@@ -1,4 +1,7 @@
 import pytest
+from .helper import (
+    is_cpu_device
+)
 from unittest import mock
 
 import dpnp
@@ -57,10 +60,19 @@ def test_distr(self, dtype, usm_type):
 
         # default dtype depends on fp64 support by the device
         dtype = get_default_floating() if dtype is None else dtype
-        expected = numpy.array([[0.428205496031286, -0.55383273779227 ],
-                                [2.027017795643378,  4.318888073163015],
-                                [2.69080893259102,  -1.047967253719708]], dtype=dtype)
-
+        if sycl_queue.sycl_device.is_cpu:
+            expected = numpy.array([[0.428205496031286, -0.55383273779227 ],
+                                    [2.027017795643378,  4.318888073163015],
+                                    [2.69080893259102,  -1.047967253719708]], dtype=dtype)
+        else:
+            if dtype == dpnp.float64:
+                expected = numpy.array([[-15.73532523, -11.84163022],
+                                        [  0.548032,     1.41296207],
+                                        [-2.63250381,    2.77542322]], dtype=dtype)
+            else:
+                expected = numpy.array([[-15.735329, -11.841626],
+                                        [  0.548032,   1.4129621],
+                                        [-2.6325033,   2.7754242]], dtype=dtype)
         # TODO: discuss with opneMKL: there is a difference between CPU and GPU
         # generated samples since 9 digit while precision=15 for float64
         # precision = numpy.finfo(dtype=dtype).precision
@@ -200,11 +212,16 @@ def test_distr(self, usm_type):
         dtype = get_default_floating()
 
         data = RandomState(seed, sycl_queue=sycl_queue).rand(3, 2, usm_type=usm_type)
-        expected = numpy.array([[0.7592552667483687, 0.5937560645397753],
-                                [0.257010098779574 , 0.749422621447593 ],
-                                [0.6316644293256104, 0.7411410815548152]], dtype=dtype)
+        if sycl_queue.sycl_device.is_cpu:
+            expected = numpy.array([[0.7592552667483687, 0.5937560645397753],
+                                    [0.257010098779574 , 0.749422621447593 ],
+                                    [0.6316644293256104, 0.7411410815548152]], dtype=dtype)
+        else:
+            expected = numpy.array([[4.864511571334162e-14, 7.333946068708259e-01],
+                                    [8.679067575689537e-01, 5.627257087965376e-01],
+                                    [4.413379518222594e-01, 4.334482843514076e-01]], dtype=dtype)
 
-        precision = numpy.finfo(dtype=numpy.float64).precision
+        precision = numpy.finfo(dtype=dtype).precision
         assert_array_almost_equal(data.asnumpy(), expected, decimal=precision)
         assert_cfd(data, sycl_queue, usm_type)
 
@@ -276,9 +293,14 @@ def test_distr(self, dtype, usm_type):
                                                                 size=(3, 2),
                                                                 dtype=dtype,
                                                                 usm_type=usm_type)
-        expected = numpy.array([[4, 1],
-                                [5, 3],
-                                [5, 7]], dtype=numpy.int32)
+        if sycl_queue.sycl_device.is_cpu:
+            expected = numpy.array([[4, 1],
+                                    [5, 3],
+                                    [5, 7]], dtype=numpy.int32)
+        else:
+            expected = numpy.array([[1, 2],
+                                    [1, 5],
+                                    [3, 7]], dtype=numpy.int32)
         assert_array_equal(data.asnumpy(), expected)
         assert_cfd(data, sycl_queue, usm_type)
 
@@ -310,16 +332,23 @@ def test_distr(self, dtype, usm_type):
 
 
     def test_float_bounds(self):
-        actual = RandomState(365852).randint(low=0.6, high=6.789102534, size=(7,)).asnumpy()
-        expected = numpy.array([4, 4, 3, 3, 1, 0, 3], dtype=numpy.int32)
-        assert_array_equal(actual, expected)
+        actual = RandomState(365852).randint(low=0.6, high=6.789102534, size=(7,))
+        if actual.sycl_device.is_cpu:
+            expected = numpy.array([4, 4, 3, 3, 1, 0, 3], dtype=numpy.int32)
+        else:
+            expected = numpy.array([0, 1, 4, 0, 3, 3, 3], dtype=numpy.int32)
+        assert_array_equal(actual.asnumpy(), expected)
 
 
     def test_negative_bounds(self):
-        actual = RandomState(5143).randint(low=-15.74, high=-3, size=(2, 7)).asnumpy()
-        expected = numpy.array([[-9, -12, -4,  -12, -5, -13, -9],
-                                [-4, -6,  -13, -9,  -9,  -6, -15]], dtype=numpy.int32)
-        assert_array_equal(actual, expected)
+        actual = RandomState(5143).randint(low=-15.74, high=-3, size=(2, 7))
+        if actual.sycl_device.is_cpu:
+            expected = numpy.array([[-9, -12, -4,  -12, -5, -13, -9],
+                                    [-4, -6,  -13, -9,  -9,  -6, -15]], dtype=numpy.int32)
+        else:
+            expected = numpy.array([[-15,  -7, -12,  -5, -10, -11, -11],
+                                    [-14,  -7,  -7, -10, -14,  -9,  -6]], dtype=numpy.int32)
+        assert_array_equal(actual.asnumpy(), expected)
 
 
     def test_negative_interval(self):
@@ -459,9 +488,14 @@ def test_distr(self, usm_type):
         dtype = get_default_floating()
 
         data = RandomState(seed, sycl_queue=sycl_queue).randn(3, 2, usm_type=usm_type)
-        expected = numpy.array([[-0.862485623762009,  1.169492612490272],
-                                [-0.405876118480338,  0.939006537666719],
-                                [-0.615075625641019,  0.555260469834381]], dtype=dtype)
+        if sycl_queue.sycl_device.is_cpu:
+            expected = numpy.array([[-0.862485623762009,  1.169492612490272],
+                                    [-0.405876118480338,  0.939006537666719],
+                                    [-0.615075625641019,  0.555260469834381]], dtype=dtype)
+        else:
+            expected = numpy.array([[-4.019566117504177,  7.016412093100934],
+                                    [-1.044015254820266, -0.839721616192757],
+                                    [ 0.545079768980527,  0.380676324099473]], dtype=dtype)
 
         # TODO: discuss with opneMKL: there is a difference between CPU and GPU
         # generated samples since 9 digit while precision=15 for float64
@@ -543,6 +577,9 @@ def test_scalar(self, func):
                                   'dpnp.arange(2)',
                                   '[0]', '[4294967295]', '[2, 7, 15]', '(1,)', '(85, 6, 17)'])
     def test_array_range(self, seed):
+        if not is_cpu_device():
+            pytest.skip("seed as a scalar is only supported on GPU")
+
         size = 15
         a1 = RandomState(seed).uniform(size=size).asnumpy()
         a2 = RandomState(seed).uniform(size=size).asnumpy()
@@ -580,9 +617,16 @@ def test_invalid_type(self, seed):
                                   'numpy.iinfo(numpy.uint32).max + 1',
                                   '(1, 7, numpy.iinfo(numpy.uint32).max + 1)'])
     def test_invalid_value(self, seed):
-        # seed must be an unsigned 32-bit integer
-        assert_raises(ValueError, RandomState, seed)
-
+        if is_cpu_device():
+            # seed must be an unsigned 32-bit integer
+            assert_raises(ValueError, RandomState, seed)
+        else:
+            if dpnp.isscalar(seed):
+                # seed must be an unsigned 64-bit integer
+                assert_raises(ValueError, RandomState, seed)
+            else:
+                # seed must be a scalar
+                assert_raises(TypeError, RandomState, seed)
 
     @pytest.mark.parametrize("seed",
                              [[], (),
@@ -596,8 +640,16 @@ def test_invalid_value(self, seed):
                                   'numpy.array([], dtype=numpy.int64)',
                                   'dpnp.array([], dtype=numpy.int64)'])
     def test_invalid_shape(self, seed):
-        # seed must be an unsigned or 1-D array
-        assert_raises(ValueError, RandomState, seed)
+        if is_cpu_device():
+            # seed must be an unsigned or 1-D array
+            assert_raises(ValueError, RandomState, seed)
+        else:
+            if dpnp.isscalar(seed):
+                # seed must be an unsigned 64-bit scalar
+                assert_raises(ValueError, RandomState, seed)
+            else:
+                # seed must be a scalar
+                assert_raises(TypeError, RandomState, seed)
 
 
 class TestStandardNormal:
@@ -610,10 +662,16 @@ def test_distr(self, usm_type):
         dtype = get_default_floating()
 
         data = RandomState(seed, sycl_queue=sycl_queue).standard_normal(size=(4, 2), usm_type=usm_type)
-        expected = numpy.array([[0.112455902594571, -0.249919829443642],
-                                [0.702423540827815,  1.548132130318456],
-                                [0.947364919775284, -0.432257289195464],
-                                [0.736848611436872,  1.557284323302839]], dtype=dtype)
+        if sycl_queue.sycl_device.is_cpu:
+            expected = numpy.array([[0.112455902594571, -0.249919829443642],
+                                    [0.702423540827815,  1.548132130318456],
+                                    [0.947364919775284, -0.432257289195464],
+                                    [0.736848611436872,  1.557284323302839]], dtype=dtype)
+        else:
+            expected = numpy.array([[-5.851946579836138, -4.415158753007455],
+                                    [ 0.156672323326223,  0.475834711471613],
+                                    [-1.016957125278234,  0.978587902851975],
+                                    [-0.295425067084912,  1.438622345507964]], dtype=dtype)
 
         # TODO: discuss with opneMKL: there is a difference between CPU and GPU
         # generated samples since 9 digit while precision=15 for float64
@@ -670,11 +728,18 @@ def test_distr(self, usm_type):
         dtype = get_default_floating()
 
         data = RandomState(seed, sycl_queue=sycl_queue).random_sample(size=(4, 2), usm_type=usm_type)
-        expected = numpy.array([[0.1887628440745175, 0.2763057765550911],
-                                [0.3973943444434553, 0.2975987731479108],
-                                [0.4144027342554182, 0.2636592474300414],
-                                [0.6129623607266694, 0.2596735346596688]], dtype=dtype)
-        
+        if sycl_queue.sycl_device.is_cpu:
+            expected = numpy.array([[0.1887628440745175, 0.2763057765550911],
+                                    [0.3973943444434553, 0.2975987731479108],
+                                    [0.4144027342554182, 0.2636592474300414],
+                                    [0.6129623607266694, 0.2596735346596688]], dtype=dtype)
+        else:
+            expected = numpy.array([[0.219563950354e-13, 0.6500454867400344],
+                                    [0.8847833902913576, 0.9030532521302965],
+                                    [0.2943803743033427, 0.2688879158061396],
+                                    [0.2730219631925900, 0.8695396883048091]], dtype=dtype)
+
+
         precision = numpy.finfo(dtype=dtype).precision
         assert_array_almost_equal(data.asnumpy(), expected, decimal=precision)
 
@@ -746,16 +811,28 @@ def test_distr(self, bounds, dtype, usm_type):
 
         # default dtype depends on fp64 support by the device
         dtype = get_default_floating() if dtype is None else dtype
-        if dtype != dpnp.int32:
-            expected = numpy.array([[4.023770128630567, 8.87456423597643 ],
-                                    [2.888630247435067, 4.823004481580574],
-                                    [2.030351535445079, 4.533497077834326]])
-            assert_array_almost_equal(actual, expected, decimal=numpy.finfo(dtype=dtype).precision)
+        if sycl_queue.sycl_device.is_cpu:
+            if dtype != dpnp.int32:
+                expected = numpy.array([[4.023770128630567, 8.87456423597643 ],
+                                        [2.888630247435067, 4.823004481580574],
+                                        [2.030351535445079, 4.533497077834326]])
+                assert_array_almost_equal(actual, expected, decimal=numpy.finfo(dtype=dtype).precision)
+            else:
+                expected = numpy.array([[3, 8],
+                                        [2, 4],
+                                        [1, 4]])
+                assert_array_equal(actual, expected)
         else:
-            expected = numpy.array([[3, 8],
-                                   [2, 4],
-                                   [1, 4]])
-            assert_array_equal(actual, expected)
+            if dtype != dpnp.int32:
+                expected = numpy.array([[1.230000000452886, 4.889115418092382],
+                                        [6.084098950993071, 1.682066500463302],
+                                        [3.316473517549554, 8.428297791221597]])
+                assert_array_almost_equal(actual, expected, decimal=numpy.finfo(dtype=dtype).precision)
+            else:
+                expected = numpy.array([[1, 4],
+                                        [5, 1],
+                                        [3, 7]])
+                assert_array_equal(actual, expected)
 
         # check if compute follows data isn't broken
         assert_cfd(dpnp_data, sycl_queue, usm_type)
diff --git a/tests/test_strides.py b/tests/test_strides.py
index 84449db23d61..83396e019cbd 100644
--- a/tests/test_strides.py
+++ b/tests/test_strides.py
@@ -1,6 +1,8 @@
 import math
 import pytest
-from .helper import get_all_dtypes, is_cpu_device
+from .helper import (
+    get_all_dtypes
+)
 
 import dpnp
 
@@ -117,7 +119,7 @@ def test_strides_tan(dtype, shape):
 
 
 @pytest.mark.parametrize("func_name",
-                         ["add", "arctan2", "hypot", "maximum", "minimum", "multiply", "power", "subtract"])
+                         ["add", "arctan2", "divide", "hypot", "maximum", "minimum", "multiply", "power", "subtract"])
 @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
 @pytest.mark.parametrize("shape",
                          [(3, 3)],
@@ -214,14 +216,14 @@ def test_strides_true_devide(dtype, shape):
 
 
 @pytest.mark.parametrize("func_name",
-                         ["add", "multiply", "power"])
+                         ["add", "multiply", "power", "subtract"])
 @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
 def test_strided_out_2args(func_name, dtype):
-    np_out = numpy.ones((5, 3, 2))[::3]
+    np_out = numpy.ones((5, 3, 2), dtype=dtype)[::3]
     np_a = numpy.arange(numpy.prod(np_out.shape), dtype=dtype).reshape(np_out.shape)
     np_b = numpy.full(np_out.shape, fill_value=0.7, dtype=dtype)
 
-    dp_out = dpnp.ones((5, 3, 2))[::3]
+    dp_out = dpnp.ones((5, 3, 2), dtype=dtype)[::3]
     dp_a = dpnp.array(np_a)
     dp_b = dpnp.array(np_b)
 
@@ -233,7 +235,7 @@ def test_strided_out_2args(func_name, dtype):
 
 
 @pytest.mark.parametrize("func_name",
-                         ["add", "multiply", "power"])
+                         ["add", "multiply", "power", "subtract"])
 @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
 def test_strided_in_out_2args(func_name, dtype):
     sh = (3, 4, 2)
@@ -255,8 +257,9 @@ def test_strided_in_out_2args(func_name, dtype):
 
 
 @pytest.mark.parametrize("func_name",
-                         ["add", "multiply", "power"])
+                         ["add", "multiply", "power", "subtract"])
 @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
+@pytest.mark.skip("dpctl doesn't support type mismatch of out array")
 def test_strided_in_out_2args_diff_out_dtype(func_name, dtype):
     sh = (3, 3, 2)
     prod = numpy.prod(sh)
@@ -277,8 +280,9 @@ def test_strided_in_out_2args_diff_out_dtype(func_name, dtype):
 
 
 @pytest.mark.parametrize("func_name",
-                         ["add", "multiply", "power"])
+                         ["add", "multiply", "power", "subtract"])
 @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True, no_none=True))
+@pytest.mark.skip("dpctl doesn't support overlap of arrays")
 def test_strided_in_2args_overlap(func_name, dtype):
     size = 5
 
@@ -293,8 +297,9 @@ def test_strided_in_2args_overlap(func_name, dtype):
 
 
 @pytest.mark.parametrize("func_name",
-                         ["add", "multiply", "power"])
+                         ["add", "multiply", "power", "subtract"])
 @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True, no_none=True))
+@pytest.mark.skip("dpctl doesn't support overlap of arrays")
 def test_strided_in_out_2args_overlap(func_name, dtype):
     sh = (4, 3, 2)
     prod = numpy.prod(sh)
diff --git a/tests/test_sum.py b/tests/test_sum.py
index 21b1a99ffe15..ed382a4bcdd0 100644
--- a/tests/test_sum.py
+++ b/tests/test_sum.py
@@ -1,15 +1,24 @@
+import pytest
+
 import dpnp
+from tests.helper import get_float_dtypes, has_support_aspect64
 
 import numpy
 
-
-def test_sum_float64():
-    a = numpy.array([[[-2., 3.], [9.1, 0.2]], [[-2., 5.0], [-2, -1.2]], [[1.0, -2.], [5.0, -1.1]]])
+# Note: numpy.sum() always upcast integers to (u)int64 and float32 to
+# float64 for dtype=None. `np.sum` does that too for integers, but not for
+# float32, so we need to special-case it for these tests
+@pytest.mark.parametrize("dtype", get_float_dtypes())
+def test_sum_float(dtype):
+    a = numpy.array([[[-2., 3.], [9.1, 0.2]], [[-2., 5.0], [-2, -1.2]], [[1.0, -2.], [5.0, -1.1]]], dtype=dtype)
     ia = dpnp.array(a)
 
     for axis in range(len(a)):
         result = dpnp.sum(ia, axis=axis)
-        expected = numpy.sum(a, axis=axis)
+        if dtype == dpnp.float32 and has_support_aspect64():
+            expected = numpy.sum(a, axis=axis, dtype=numpy.float64)
+        else:
+            expected = numpy.sum(a, axis=axis)
         numpy.testing.assert_array_equal(expected, result)
 
 
@@ -23,9 +32,12 @@ def test_sum_int():
 
 
 def test_sum_axis():
-    a = numpy.array([[[-2., 3.], [9.1, 0.2]], [[-2., 5.0], [-2, -1.2]], [[1.0, -2.], [5.0, -1.1]]])
+    a = numpy.array([[[-2., 3.], [9.1, 0.2]], [[-2., 5.0], [-2, -1.2]], [[1.0, -2.], [5.0, -1.1]]], dtype='f4')
     ia = dpnp.array(a)
 
     result = dpnp.sum(ia, axis=1)
-    expected = numpy.sum(a, axis=1)
+    if has_support_aspect64():
+        expected = numpy.sum(a, axis=1, dtype=numpy.float64)
+    else:
+        expected = numpy.sum(a, axis=1)
     numpy.testing.assert_array_equal(expected, result)
diff --git a/tests/test_sycl_queue.py b/tests/test_sycl_queue.py
index 3182003a90be..7b72c92600cb 100644
--- a/tests/test_sycl_queue.py
+++ b/tests/test_sycl_queue.py
@@ -529,7 +529,8 @@ def test_random_state(func, args, kwargs, device, usm_type):
     sycl_queue = dpctl.SyclQueue(device, property="in_order")
 
     # test with in-order SYCL queue per a device and passed as argument
-    rs = dpnp.random.RandomState((147, 56, 896), sycl_queue=sycl_queue)
+    seed = (147, 56, 896) if device.is_cpu else 987654
+    rs = dpnp.random.RandomState(seed, sycl_queue=sycl_queue)
     res_array = getattr(rs, func)(*args, **kwargs)
     assert usm_type == res_array.usm_type
     assert_sycl_queue_equal(res_array.sycl_queue, sycl_queue)
diff --git a/tests/test_usm_type.py b/tests/test_usm_type.py
index df8575197b38..61145de42c71 100644
--- a/tests/test_usm_type.py
+++ b/tests/test_usm_type.py
@@ -18,6 +18,8 @@ def test_coerced_usm_types_sum(usm_type_x, usm_type_y):
     y = dp.arange(1000, usm_type = usm_type_y)
 
     z = 1.3 + x + y + 2
+
+    # inplace add
     z += x
     z += 7.4
 
@@ -33,6 +35,8 @@ def test_coerced_usm_types_mul(usm_type_x, usm_type_y):
     y = dp.arange(10, usm_type = usm_type_y)
 
     z = 3 * x * y * 1.5
+
+    # inplace multiply
     z *= x
     z *= 4.8
 
@@ -49,6 +53,10 @@ def test_coerced_usm_types_subtract(usm_type_x, usm_type_y):
 
     z = 20 - x - y - 7.4
 
+    # inplace subtract
+    z -= x
+    z -= -3.4
+
     assert x.usm_type == usm_type_x
     assert y.usm_type == usm_type_y
     assert z.usm_type == du.get_coerced_usm_type([usm_type_x, usm_type_y])
@@ -62,6 +70,10 @@ def test_coerced_usm_types_divide(usm_type_x, usm_type_y):
 
     z = 2 / x / y / 1.5
 
+    # inplace divide
+    z /= x
+    z /= -2.4
+
     assert x.usm_type == usm_type_x
     assert y.usm_type == usm_type_y
     assert z.usm_type == du.get_coerced_usm_type([usm_type_x, usm_type_y])
diff --git a/tests/third_party/cupy/linalg_tests/test_eigenvalue.py b/tests/third_party/cupy/linalg_tests/test_eigenvalue.py
index 80ff92a1093f..704bcf3f2e6a 100644
--- a/tests/third_party/cupy/linalg_tests/test_eigenvalue.py
+++ b/tests/third_party/cupy/linalg_tests/test_eigenvalue.py
@@ -8,8 +8,7 @@
 
 
 def _get_hermitian(xp, a, UPLO):
-    # TODO: fix this, currently dpnp.transpose() doesn't support complex types
-    # and no dpnp_array.swapaxes()
+    # TODO: remove wrapping, but now there is no dpnp_array.swapaxes()
     a = _wrap_as_numpy_array(xp, a)
     _xp = numpy
 
diff --git a/tests/third_party/cupy/manipulation_tests/test_shape.py b/tests/third_party/cupy/manipulation_tests/test_shape.py
index b80437dba892..826c0e490119 100644
--- a/tests/third_party/cupy/manipulation_tests/test_shape.py
+++ b/tests/third_party/cupy/manipulation_tests/test_shape.py
@@ -28,20 +28,22 @@ def test_shape_list(self):
 
 @testing.gpu
 class TestReshape(unittest.TestCase):
+    # order = 'A' is out of support currently
+    _supported_orders = 'CF'
 
-    def test_reshape_strides(self):
+    def test_reshape_shapes(self):
         def func(xp):
             a = testing.shaped_arange((1, 1, 1, 2, 2), xp)
-            return a.strides
-        self.assertEqual(func(numpy), func(cupy))
+            return a.shape
+        assert func(numpy) == func(cupy)
 
     def test_reshape2(self):
         def func(xp):
             a = xp.zeros((8,), dtype=xp.float32)
-            return a.reshape((1, 1, 1, 4, 1, 2)).strides
-        self.assertEqual(func(numpy), func(cupy))
+            return a.reshape((1, 1, 1, 4, 1, 2)).shape
+        assert func(numpy) == func(cupy)
 
-    @testing.for_orders('CFA')
+    @testing.for_orders(_supported_orders)
     @testing.for_all_dtypes()
     @testing.numpy_cupy_array_equal()
     def test_nocopy_reshape(self, xp, dtype, order):
@@ -50,7 +52,7 @@ def test_nocopy_reshape(self, xp, dtype, order):
         b[1] = 1
         return a
 
-    @testing.for_orders('CFA')
+    @testing.for_orders(_supported_orders)
     @testing.for_all_dtypes()
     @testing.numpy_cupy_array_equal()
     def test_nocopy_reshape_with_order(self, xp, dtype, order):
@@ -59,13 +61,13 @@ def test_nocopy_reshape_with_order(self, xp, dtype, order):
         b[1] = 1
         return a
 
-    @testing.for_orders('CFA')
+    @testing.for_orders(_supported_orders)
     @testing.numpy_cupy_array_equal()
     def test_transposed_reshape2(self, xp, order):
         a = testing.shaped_arange((2, 3, 4), xp).transpose(2, 0, 1)
         return a.reshape(2, 3, 4, order=order)
 
-    @testing.for_orders('CFA')
+    @testing.for_orders(_supported_orders)
     @testing.numpy_cupy_array_equal()
     def test_reshape_with_unknown_dimension(self, xp, order):
         a = testing.shaped_arange((2, 3, 4), xp)
@@ -95,17 +97,58 @@ def test_reshape_zerosize_invalid(self):
             with pytest.raises(ValueError):
                 a.reshape(())
 
+    def test_reshape_zerosize_invalid_unknown(self):
+        for xp in (numpy, cupy):
+            a = xp.zeros((0,))
+            with pytest.raises(ValueError):
+                a.reshape((-1, 0))
+
     @testing.numpy_cupy_array_equal()
     def test_reshape_zerosize(self, xp):
         a = xp.zeros((0,))
-        return a.reshape((0,))
-
-    @testing.for_orders('CFA')
+        b = a.reshape((0,))
+        assert b.base is a
+        return b
+
+    @testing.for_orders(_supported_orders)
+    @testing.numpy_cupy_array_equal(strides_check=True)
+    def test_reshape_zerosize2(self, xp, order):
+        a = xp.zeros((2, 0, 3))
+        b = a.reshape((5, 0, 4), order=order)
+        assert b.base is a
+        return b
+
+    @testing.for_orders(_supported_orders)
     @testing.numpy_cupy_array_equal()
     def test_external_reshape(self, xp, order):
         a = xp.zeros((8,), dtype=xp.float32)
         return xp.reshape(a, (1, 1, 1, 4, 1, 2), order=order)
 
+    def _test_ndim_limit(self, xp, ndim, dtype, order):
+        idx = [1]*ndim
+        idx[-1] = ndim
+        a = xp.ones(ndim, dtype=dtype)
+        a = a.reshape(idx, order=order)
+        assert a.ndim == ndim
+        return a
+
+    @testing.for_orders(_supported_orders)
+    @testing.for_all_dtypes()
+    @testing.numpy_cupy_array_equal()
+    def test_ndim_limit1(self, xp, dtype, order):
+        # from cupy/cupy#4193
+        a = self._test_ndim_limit(xp, 32, dtype, order)
+        return a
+
+    @pytest.mark.skip("no max ndim limit for reshape in dpctl")
+    @testing.for_orders(_supported_orders)
+    @testing.for_all_dtypes()
+    def test_ndim_limit2(self, dtype, order):
+        # from cupy/cupy#4193
+        for xp in (numpy, cupy):
+            with pytest.raises(ValueError):
+                self._test_ndim_limit(xp, 33, dtype, order)
+
 
 @testing.gpu
 class TestRavel(unittest.TestCase):
@@ -139,7 +182,9 @@ def test_external_ravel(self, xp):
 
 @testing.parameterize(*testing.product({
     'order_init': ['C', 'F'],
-    'order_reshape': ['C', 'F', 'A', 'c', 'f', 'a'],
+    # order = 'A' is out of support currently
+    # 'order_reshape': ['C', 'F', 'A', 'c', 'f', 'a'],
+    'order_reshape': ['C', 'F', 'c', 'f'],
     'shape_in_out': [((2, 3), (1, 6, 1)),  # (shape_init, shape_final)
                      ((6,), (2, 3)),
                      ((3, 3, 3), (9, 3))],
@@ -161,5 +206,4 @@ def test_reshape_contiguity(self):
         assert b_cupy.flags.f_contiguous == b_numpy.flags.f_contiguous
         assert b_cupy.flags.c_contiguous == b_numpy.flags.c_contiguous
 
-        testing.assert_array_equal(b_cupy.strides, b_numpy.strides)
         testing.assert_array_equal(b_cupy, b_numpy)
diff --git a/tests/third_party/cupy/math_tests/test_arithmetic.py b/tests/third_party/cupy/math_tests/test_arithmetic.py
index 71f33429c704..c52b2d2df3a5 100644
--- a/tests/third_party/cupy/math_tests/test_arithmetic.py
+++ b/tests/third_party/cupy/math_tests/test_arithmetic.py
@@ -153,7 +153,7 @@ def check_binary(self, xp):
             is_int_float = lambda _x, _y: numpy.issubdtype(_x, numpy.integer) and numpy.issubdtype(_y, numpy.floating)
             is_same_type = lambda _x, _y, _type: numpy.issubdtype(_x, _type) and numpy.issubdtype(_y, _type)
 
-            if self.name in ('add', 'multiply', 'power', 'subtract'):
+            if self.name == 'power':
                 if is_array_arg1 and is_array_arg2:
                     # If both inputs are arrays where one is of floating type and another - integer,
                     # NumPy will return an output array of always "float64" type,
diff --git a/tests/third_party/cupy/math_tests/test_sumprod.py b/tests/third_party/cupy/math_tests/test_sumprod.py
index d9fe3b22b265..ae5aaed495a3 100644
--- a/tests/third_party/cupy/math_tests/test_sumprod.py
+++ b/tests/third_party/cupy/math_tests/test_sumprod.py
@@ -4,6 +4,7 @@
 import pytest
 
 import dpnp as cupy
+from tests.helper import has_support_aspect64
 from tests.third_party.cupy import testing
 
 
@@ -16,59 +17,95 @@ def tearDown(self):
         # cupy.get_default_pinned_memory_pool().free_all_blocks()
         pass
 
+    # Note: numpy.sum() always upcast integers to (u)int64 and float32 to
+    # float64 for dtype=None. `np.sum` does that too for integers, but not for
+    # float32, so we need to special-case it for these tests
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose()
     def test_sum_all(self, xp, dtype):
         a = testing.shaped_arange((2, 3, 4), xp, dtype)
-        return a.sum()
+        if xp is numpy and dtype == numpy.float32 and has_support_aspect64():
+            dtype=numpy.float64
+            return a.sum(dtype=dtype)
+        else:
+            return a.sum()
 
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose()
     def test_sum_all_keepdims(self, xp, dtype):
         a = testing.shaped_arange((2, 3, 4), xp, dtype)
-        return a.sum(keepdims=True)
+        if xp is numpy and dtype == numpy.float32 and has_support_aspect64():
+            dtype=numpy.float64
+            return a.sum(dtype=dtype, keepdims=True)
+        else:
+            return a.sum(keepdims=True)
 
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose()
     def test_external_sum_all(self, xp, dtype):
         a = testing.shaped_arange((2, 3, 4), xp, dtype)
-        return xp.sum(a)
+        if xp is numpy and dtype == numpy.float32 and has_support_aspect64():
+            dtype=numpy.float64
+            return xp.sum(a, dtype=dtype)
+        else:
+            return xp.sum(a)
 
     @testing.for_all_dtypes()
-    @testing.numpy_cupy_allclose()
+    @testing.numpy_cupy_allclose(rtol=1e-06)
     def test_sum_all2(self, xp, dtype):
         a = testing.shaped_arange((20, 30, 40), xp, dtype)
-        return a.sum()
+        if xp is numpy and dtype == numpy.float32 and has_support_aspect64():
+            dtype=numpy.float64
+            return a.sum(dtype=dtype)
+        else:
+            return a.sum()
+
 
     @testing.for_all_dtypes()
-    @testing.numpy_cupy_allclose()
+    @testing.numpy_cupy_allclose(type_check=False)
     def test_sum_all_transposed(self, xp, dtype):
         a = testing.shaped_arange((2, 3, 4), xp, dtype).transpose(2, 0, 1)
-        return a.sum()
+        if xp is numpy and dtype == numpy.float32 and has_support_aspect64():
+            dtype=numpy.float64
+            return a.sum(dtype=dtype)
+        else:
+            return a.sum()
 
     @testing.for_all_dtypes()
-    @testing.numpy_cupy_allclose()
+    @testing.numpy_cupy_allclose(rtol=1e-06)
     def test_sum_all_transposed2(self, xp, dtype):
         a = testing.shaped_arange((20, 30, 40), xp, dtype).transpose(2, 0, 1)
-        return a.sum()
+        if xp is numpy and dtype == numpy.float32 and has_support_aspect64():
+            dtype=numpy.float64
+            return a.sum(dtype=dtype)
+        else:
+            return a.sum()
 
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose()
     def test_sum_axis(self, xp, dtype):
         a = testing.shaped_arange((2, 3, 4), xp, dtype)
-        return a.sum(axis=1)
+        if xp is numpy and dtype == numpy.float32 and has_support_aspect64():
+            dtype=numpy.float64
+            return a.sum(dtype=dtype, axis=1)
+        else:
+            return a.sum(axis=1)
 
     @testing.slow
     @testing.numpy_cupy_allclose()
     def test_sum_axis_huge(self, xp):
-        a = testing.shaped_random((204, 102, 102), xp, 'd')
+        a = testing.shaped_random((204, 102, 102), xp, 'i4')
         return a.sum(axis=2)
 
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose()
     def test_external_sum_axis(self, xp, dtype):
         a = testing.shaped_arange((2, 3, 4), xp, dtype)
-        return xp.sum(a, axis=1)
+        if xp is numpy and dtype == numpy.float32 and has_support_aspect64():
+            dtype=numpy.float64
+            return xp.sum(a, axis=1, dtype=dtype)
+        else:
+            return xp.sum(a, axis=1)
 
     # float16 is omitted, since NumPy's sum on float16 arrays has more error
     # than CuPy's.
@@ -76,43 +113,71 @@ def test_external_sum_axis(self, xp, dtype):
     @testing.numpy_cupy_allclose()
     def test_sum_axis2(self, xp, dtype):
         a = testing.shaped_arange((20, 30, 40), xp, dtype)
-        return a.sum(axis=1)
+        if xp is numpy and dtype == numpy.float32 and has_support_aspect64():
+            dtype=numpy.float64
+            return a.sum(axis=1, dtype=dtype)
+        else:
+            return a.sum(axis=1)
 
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose(contiguous_check=False)
     def test_sum_axis_transposed(self, xp, dtype):
         a = testing.shaped_arange((2, 3, 4), xp, dtype).transpose(2, 0, 1)
-        return a.sum(axis=1)
+        if xp is numpy and dtype == numpy.float32 and has_support_aspect64():
+            dtype=numpy.float64
+            return a.sum(axis=1, dtype=dtype)
+        else:
+            return a.sum(axis=1)
 
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose(contiguous_check=False)
     def test_sum_axis_transposed2(self, xp, dtype):
         a = testing.shaped_arange((20, 30, 40), xp, dtype).transpose(2, 0, 1)
-        return a.sum(axis=1)
+        if xp is numpy and dtype == numpy.float32 and has_support_aspect64():
+            dtype=numpy.float64
+            return a.sum(axis=1, dtype=dtype)
+        else:
+            return a.sum(axis=1)
 
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose()
     def test_sum_axes(self, xp, dtype):
         a = testing.shaped_arange((2, 3, 4, 5), xp, dtype)
-        return a.sum(axis=(1, 3))
+        if xp is numpy and dtype == numpy.float32 and has_support_aspect64():
+            dtype=numpy.float64
+            return a.sum(axis=(1, 3), dtype=dtype)
+        else:
+            return a.sum(axis=(1, 3))
 
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose(rtol=1e-4)
     def test_sum_axes2(self, xp, dtype):
         a = testing.shaped_arange((20, 30, 40, 50), xp, dtype)
-        return a.sum(axis=(1, 3))
+        if xp is numpy and dtype == numpy.float32 and has_support_aspect64():
+            dtype=numpy.float64
+            return a.sum(axis=(1, 3), dtype=dtype)
+        else:
+            return a.sum(axis=(1, 3))
 
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose(rtol=1e-6)
     def test_sum_axes3(self, xp, dtype):
         a = testing.shaped_arange((2, 3, 4, 5), xp, dtype)
-        return a.sum(axis=(0, 2, 3))
+        if xp is numpy and dtype == numpy.float32 and has_support_aspect64():
+            dtype=numpy.float64
+            return a.sum(axis=(0, 2, 3), dtype=dtype)
+        else:
+            return a.sum(axis=(0, 2, 3))
 
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose(rtol=1e-6)
     def test_sum_axes4(self, xp, dtype):
         a = testing.shaped_arange((20, 30, 40, 50), xp, dtype)
-        return a.sum(axis=(0, 2, 3))
+        if xp is numpy and dtype == numpy.float32 and has_support_aspect64():
+            dtype=numpy.float64
+            return a.sum(axis=(0, 2, 3), dtype=dtype)
+        else:
+            return a.sum(axis=(0, 2, 3))
 
     @testing.for_all_dtypes_combination(names=['src_dtype', 'dst_dtype'])
     @testing.numpy_cupy_allclose()
@@ -130,7 +195,11 @@ def test_sum_keepdims_and_dtype(self, xp, src_dtype, dst_dtype):
     @testing.numpy_cupy_allclose()
     def test_sum_keepdims_multiple_axes(self, xp, dtype):
         a = testing.shaped_arange((2, 3, 4), xp, dtype)
-        return a.sum(axis=(1, 2), keepdims=True)
+        if xp is numpy and dtype == numpy.float32 and has_support_aspect64():
+            dtype=numpy.float64
+            return a.sum(axis=(1, 2), keepdims=True, dtype=dtype)
+        else:
+            return a.sum(axis=(1, 2), keepdims=True)
 
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose()
diff --git a/tests/third_party/cupy/statistics_tests/test_correlation.py b/tests/third_party/cupy/statistics_tests/test_correlation.py
index b726951373af..b37d2c95b27c 100644
--- a/tests/third_party/cupy/statistics_tests/test_correlation.py
+++ b/tests/third_party/cupy/statistics_tests/test_correlation.py
@@ -1,9 +1,11 @@
+import sys
 import unittest
 
 import numpy
 import pytest
 
 import dpnp as cupy
+from dpctl import select_default_device
 from tests.third_party.cupy import testing
 
 
@@ -37,9 +39,11 @@ def test_corrcoef_rowvar(self, xp, dtype):
         return xp.corrcoef(a, y=y, rowvar=False)
 
 
-@testing.gpu
 class TestCov(unittest.TestCase):
 
+    # resulting dtype will differ with numpy if no fp64 support by a default device
+    _has_fp64 = select_default_device().has_aspect_fp64
+
     def generate_input(self, a_shape, y_shape, xp, dtype):
         a = testing.shaped_arange(a_shape, xp, dtype)
         y = None
@@ -48,27 +52,40 @@ def generate_input(self, a_shape, y_shape, xp, dtype):
         return a, y
 
     @testing.for_all_dtypes()
-    @testing.numpy_cupy_allclose(type_check=False)
+    @testing.numpy_cupy_allclose(type_check=_has_fp64, accept_error=True)
     def check(self, a_shape, y_shape=None, rowvar=True, bias=False,
-              ddof=None, xp=None, dtype=None):
+              ddof=None, xp=None, dtype=None,
+              fweights=None, aweights=None, name=None):
         a, y = self.generate_input(a_shape, y_shape, xp, dtype)
-        return xp.cov(a, y, rowvar, bias, ddof)
+        if fweights is not None:
+            fweights = name.asarray(fweights)
+        if aweights is not None:
+            aweights = name.asarray(aweights)
+        # print(type(fweights))
+        # return xp.cov(a, y, rowvar, bias, ddof,
+        #               fweights, aweights, dtype=dtype)
+        return xp.cov(a, y, rowvar, bias, ddof,
+                      fweights, aweights)
 
     @testing.for_all_dtypes()
-    @testing.numpy_cupy_allclose()
+    @testing.numpy_cupy_allclose(accept_error=True)
     def check_warns(self, a_shape, y_shape=None, rowvar=True, bias=False,
-                    ddof=None, xp=None, dtype=None):
+                    ddof=None, xp=None, dtype=None,
+                    fweights=None, aweights=None):
         with testing.assert_warns(RuntimeWarning):
             a, y = self.generate_input(a_shape, y_shape, xp, dtype)
-            return xp.cov(a, y, rowvar, bias, ddof)
+            return xp.cov(a, y, rowvar, bias, ddof,
+                          fweights, aweights, dtype=dtype)
 
     @testing.for_all_dtypes()
-    def check_raises(self, a_shape, y_shape=None, rowvar=True, bias=False,
-                     ddof=None, dtype=None):
+    def check_raises(self, a_shape, y_shape=None,
+                     rowvar=True, bias=False, ddof=None,
+                     dtype=None, fweights=None, aweights=None):
         for xp in (numpy, cupy):
             a, y = self.generate_input(a_shape, y_shape, xp, dtype)
             with pytest.raises(ValueError):
-                xp.cov(a, y, rowvar, bias, ddof)
+                xp.cov(a, y, rowvar, bias, ddof,
+                       fweights, aweights, dtype=dtype)
 
     @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_cov(self):
@@ -78,6 +95,12 @@ def test_cov(self):
         self.check((2, 3), (2, 3), rowvar=False)
         self.check((2, 3), bias=True)
         self.check((2, 3), ddof=2)
+        self.check((2, 3))
+        self.check((1, 3), fweights=(1, 4, 1))
+        self.check((1, 3), aweights=(1.0, 4.0, 1.0))
+        self.check((1, 3), bias=True, aweights=(1.0, 4.0, 1.0))
+        self.check((1, 3), fweights=(1, 4, 1),
+                   aweights=(1.0, 4.0, 1.0))
 
     @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_cov_warns(self):
diff --git a/tests/third_party/cupy/statistics_tests/test_meanvar.py b/tests/third_party/cupy/statistics_tests/test_meanvar.py
index 60d3413b0daa..cbe162ba0beb 100644
--- a/tests/third_party/cupy/statistics_tests/test_meanvar.py
+++ b/tests/third_party/cupy/statistics_tests/test_meanvar.py
@@ -159,7 +159,6 @@ def test_external_mean_all(self, xp, dtype):
         a = testing.shaped_arange((2, 3), xp, dtype)
         return xp.mean(a)
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose()
     def test_mean_axis(self, xp, dtype):
@@ -172,21 +171,18 @@ def test_external_mean_axis(self, xp, dtype):
         a = testing.shaped_arange((2, 3, 4), xp, dtype)
         return xp.mean(a, axis=1)
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes(no_complex=True)
-    @testing.numpy_cupy_allclose()
-    def test_mean_all_float64_dtype(self, xp, dtype):
+    @testing.numpy_cupy_allclose(rtol=1e-06)
+    def test_mean_all_float32_dtype(self, xp, dtype):
         a = xp.full((2, 3, 4), 123456789, dtype=dtype)
-        return xp.mean(a, dtype=numpy.float64)
+        return xp.mean(a, dtype=numpy.float32)
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes(no_complex=True)
     @testing.numpy_cupy_allclose()
     def test_mean_all_int64_dtype(self, xp, dtype):
         a = testing.shaped_arange((2, 3, 4), xp, dtype)
         return xp.mean(a, dtype=numpy.int64)
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose()
     def test_mean_all_complex_dtype(self, xp, dtype):
diff --git a/tests/third_party/cupy/testing/__init__.py b/tests/third_party/cupy/testing/__init__.py
index 09f30ade6d95..56b5d0529586 100644
--- a/tests/third_party/cupy/testing/__init__.py
+++ b/tests/third_party/cupy/testing/__init__.py
@@ -7,7 +7,7 @@
 from tests.third_party.cupy.testing.array import assert_allclose
 from tests.third_party.cupy.testing.array import assert_array_almost_equal
 # from tests.third_party.cupy.testing.array import assert_array_almost_equal_nulp
-# from tests.third_party.cupy.testing.array import assert_array_equal
+from tests.third_party.cupy.testing.array import assert_array_equal
 # from tests.third_party.cupy.testing.array import assert_array_less
 # from tests.third_party.cupy.testing.array import assert_array_list_equal
 # from tests.third_party.cupy.testing.array import assert_array_max_ulp
diff --git a/tests/third_party/cupy/testing/helper.py b/tests/third_party/cupy/testing/helper.py
index af8f6e545b29..bfd64d32a586 100644
--- a/tests/third_party/cupy/testing/helper.py
+++ b/tests/third_party/cupy/testing/helper.py
@@ -3,6 +3,7 @@
 import inspect
 import os
 import random
+from typing import Tuple, Type
 import traceback
 import unittest
 import warnings
@@ -18,7 +19,7 @@
 from dpctl import select_default_device
 # import dpnp
 # import dpnp.scipy.sparse
-
+from tests.third_party.cupy.testing.attr import is_available
 
 def prod(args, init=1):
     for arg in args:
@@ -26,6 +27,23 @@ def prod(args, init=1):
     return init
 
 
+if is_available():
+    import _pytest.outcomes
+    _is_pytest_available = True
+    _skip_classes: Tuple[Type, ...] = (
+        unittest.SkipTest, _pytest.outcomes.Skipped)
+else:
+    _is_pytest_available = False
+    _skip_classes = unittest.SkipTest,
+
+
+def _format_exception(exc):
+    if exc is None:
+        return None
+    # TODO(kataoka): Use traceback.format_exception(exc) in Python 3.10
+    return ''.join(traceback.TracebackException.from_exception(exc).format())
+
+
 def _call_func(self, impl, args, kw):
     try:
         result = impl(self, *args, **kw)
@@ -106,35 +124,46 @@ def _check_numpy_cupy_error_compatible(cupy_error, numpy_error):
 
 
 def _fail_test_with_unexpected_errors(
-        testcase, msg_format, cupy_error, cupy_tb, numpy_error, numpy_tb):
+        tb, msg_format, cupy_error, numpy_error):
     # Fails the test due to unexpected errors raised from the test.
     # msg_format may include format placeholders:
-    # '{cupy_error}' '{cupy_tb}' '{numpy_error}' '{numpy_tb}'
+    # '{cupy_error}' '{numpy_error}'
 
     msg = msg_format.format(
-        cupy_error=''.join(str(cupy_error)),
-        cupy_tb=''.join(traceback.format_tb(cupy_tb)),
-        numpy_error=''.join(str(numpy_error)),
-        numpy_tb=''.join(traceback.format_tb(numpy_tb)))
+        cupy_error=_format_exception(cupy_error),
+        numpy_error=_format_exception(numpy_error))
 
     # Fail the test with the traceback of the error (for pytest --pdb)
-    try:
-        testcase.fail(msg)
-    except AssertionError as e:
-        raise e.with_traceback(cupy_tb or numpy_tb)
-    assert False  # never reach
+    raise AssertionError(msg).with_traceback(tb)
 
 
-def _check_cupy_numpy_error(self, cupy_error, cupy_tb, numpy_error,
-                            numpy_tb, accept_error=False):
+def _check_cupy_numpy_error(cupy_error, numpy_error,
+                            accept_error=False):
     # Skip the test if both raised SkipTest.
-    if (isinstance(cupy_error, unittest.SkipTest)
-            and isinstance(numpy_error, unittest.SkipTest)):
+    if (isinstance(cupy_error, _skip_classes)
+            and isinstance(numpy_error, _skip_classes)):
+        if cupy_error.__class__ is not numpy_error.__class__:
+            raise AssertionError(
+                'Both numpy and cupy were skipped but with different '
+                'exceptions.')
         if cupy_error.args != numpy_error.args:
             raise AssertionError(
                 'Both numpy and cupy were skipped but with different causes.')
         raise numpy_error  # reraise SkipTest
 
+    # Check if the error was not raised from test code.
+    if os.environ.get('CUPY_CI', '') != '' and cupy_error is not None:
+        frame = traceback.extract_tb(cupy_error.__traceback__)[-1]
+        filename = os.path.basename(frame.filename)
+        if filename == 'test_helper.py':
+            # Allows errors from the test code for testing helpers.
+            pass
+        elif filename.startswith('test_'):
+            _fail_test_with_unexpected_errors(
+                cupy_error.__traceback__,
+                'Error was raised from test code.\n\n{cupy_error}',
+                cupy_error, None)
+
     # For backward compatibility
     if accept_error is True:
         accept_error = Exception
@@ -142,44 +171,45 @@ def _check_cupy_numpy_error(self, cupy_error, cupy_tb, numpy_error,
         accept_error = ()
     # TODO(oktua): expected_regexp like numpy.testing.assert_raises_regex
     if cupy_error is None and numpy_error is None:
-        self.fail('Both cupy and numpy are expected to raise errors, but not')
+        raise AssertionError(
+            'Both cupy and numpy are expected to raise errors, but not')
     elif cupy_error is None:
         _fail_test_with_unexpected_errors(
-            self,
-            'Only numpy raises error\n\n{numpy_tb}{numpy_error}',
-            None, None, numpy_error, numpy_tb)
+            numpy_error.__traceback__,
+            'Only numpy raises error\n\n{numpy_error}',
+            None, numpy_error)
     elif numpy_error is None:
         _fail_test_with_unexpected_errors(
-            self,
-            'Only cupy raises error\n\n{cupy_tb}{cupy_error}',
-            cupy_error, cupy_tb, None, None)
+            cupy_error.__traceback__,
+            'Only cupy raises error\n\n{cupy_error}',
+            cupy_error, None)
 
     elif not _check_numpy_cupy_error_compatible(cupy_error, numpy_error):
         _fail_test_with_unexpected_errors(
-            self,
+            cupy_error.__traceback__,
             '''Different types of errors occurred
 
 cupy
-{cupy_tb}{cupy_error}
+{cupy_error}
 
 numpy
-{numpy_tb}{numpy_error}
+{numpy_error}
 ''',
-            cupy_error, cupy_tb, numpy_error, numpy_tb)
+            cupy_error, numpy_error)
 
     elif not (isinstance(cupy_error, accept_error)
               and isinstance(numpy_error, accept_error)):
         _fail_test_with_unexpected_errors(
-            self,
+            cupy_error.__traceback__,
             '''Both cupy and numpy raise exceptions
 
 cupy
-{cupy_tb}{cupy_error}
+{cupy_error}
 
 numpy
-{numpy_tb}{numpy_error}
+{numpy_error}
 ''',
-            cupy_error, cupy_tb, numpy_error, numpy_tb)
+            cupy_error, numpy_error)
 
 
 def _make_positive_mask(self, impl, args, kw, name, sp_name, scipy_name):
@@ -230,8 +260,8 @@ def test_func(self, *args, **kw):
 
             # Check errors raised
             if cupy_error or numpy_error:
-                _check_cupy_numpy_error(self, cupy_error, cupy_tb,
-                                        numpy_error, numpy_tb,
+                _check_cupy_numpy_error(cupy_error,
+                                        numpy_error,
                                         accept_error=accept_error)
                 return
 
@@ -630,8 +660,8 @@ def test_func(self, *args, **kw):
                     _call_func_numpy_cupy(
                         self, impl, args, kw, name, sp_name, scipy_name))
 
-            _check_cupy_numpy_error(self, cupy_error, cupy_tb,
-                                    numpy_error, numpy_tb,
+            _check_cupy_numpy_error(cupy_error,
+                                    numpy_error,
                                     accept_error=accept_error)
         return test_func
     return decorator
@@ -1164,6 +1194,7 @@ def shaped_random(shape, xp=dpnp, dtype=numpy.float64, scale=10, seed=0):
     """
     numpy.random.seed(seed)
     dtype = numpy.dtype(dtype)
+
     if dtype == '?':
         return xp.asarray(numpy.random.randint(2, size=shape), dtype=dtype)
     elif dtype.kind == 'c':