diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml
index 07bd44c8fd10..da29bf31dd09 100644
--- a/.github/workflows/conda-package.yml
+++ b/.github/workflows/conda-package.yml
@@ -29,11 +29,8 @@ jobs:
 
     strategy:
       matrix:
-        python: ['3.8', '3.9']
+        python: ['3.8', '3.9', '3.10']
         os: [ubuntu-20.04, windows-latest]
-        include:
-          - python: '3.10'
-            os: ubuntu-20.04
 
     runs-on: ${{ matrix.os }}
 
@@ -110,7 +107,7 @@ jobs:
 
     strategy:
       matrix:
-        python: ['3.8', '3.9']
+        python: ['3.8', '3.9', '3.10']
         os: [ubuntu-20.04, ubuntu-latest]
 
         experimental: [false]
@@ -215,7 +212,7 @@ jobs:
 
     strategy:
       matrix:
-        python: ['3.8', '3.9']
+        python: ['3.8', '3.9', '3.10']
         experimental: [false]
 
     continue-on-error: ${{ matrix.experimental }}
@@ -384,7 +381,7 @@ jobs:
 
     strategy:
       matrix:
-        python: ['3.8', '3.9']
+        python: ['3.8', '3.9', '3.10']
         os: [ubuntu-20.04, windows-latest]
 
     runs-on: ${{ matrix.os }}
diff --git a/0.build.sh b/0.build.sh
index 380214e6802c..b1a2a29ec0ae 100755
--- a/0.build.sh
+++ b/0.build.sh
@@ -4,6 +4,14 @@ THEDIR=$(dirname $(readlink -e ${BASH_SOURCE[0]}))
 # . ${THEDIR}/0.env.sh
 cd ${THEDIR}
 
+# Assign $TMP env variable to a directory where the script locates.
+# The env variable is used by compiler as a path to temporary folder,
+# where it can store a temporary files generated during compilation and linkage phases.
+# By default the compiler uses /tmp folder, but it is limited by the size and
+# there might be not enough space to temporary keep all generated data.
+export TMP=${THEDIR}
+
+
 export DPNP_DEBUG=1
 
 python setup.py clean
@@ -17,7 +25,8 @@ CC=icpx python setup.py build_ext --inplace
 
 echo
 echo =========example3==============
-icpx -fsycl -g -fPIC dpnp/backend/examples/example3.cpp -Idpnp -Idpnp/backend/include -Ldpnp -Wl,-rpath='$ORIGIN'/dpnp -ldpnp_backend_c -o example3
+DPCTL_INCLUDES=$(python -m dpctl --includes)
+icpx -fsycl -g -O0 -ggdb3 -fPIC dpnp/backend/examples/example3.cpp $DPCTL_INCLUDES -Idpnp -Idpnp/backend/include -Ldpnp -Wl,-rpath='$ORIGIN'/dpnp -ldpnp_backend_c -o example3
 # LD_DEBUG=libs,bindings,symbols ./example3
 ./example3
 
@@ -39,7 +48,7 @@ icpx -fsycl -g -fPIC dpnp/backend/examples/example3.cpp -Idpnp -Idpnp/backend/in
 # strings /usr/share/miniconda/envs/dpnp*/lib/libstdc++.so | grep GLIBCXX | sort -n
 
 
-# echo
+echo
 echo =========example1==============
 # LD_DEBUG=libs,bindings,symbols python examples/example1.py
 # LD_DEBUG=libs python examples/example1.py
diff --git a/conda-recipe/build.sh b/conda-recipe/build.sh
index d873320f80f8..164ad09d578f 100644
--- a/conda-recipe/build.sh
+++ b/conda-recipe/build.sh
@@ -29,6 +29,11 @@ fi
 export CFLAGS="-Wl,-rpath,\$ORIGIN/../dpctl,-rpath,\$ORIGIN $CFLAGS"
 export LDFLAGS="-Wl,-rpath,\$ORIGIN/../dpctl,-rpath,\$ORIGIN $LDFLAGS"
 
+# Intel LLVM must cooperate with compiler and sysroot from conda
+echo "--gcc-toolchain=${BUILD_PREFIX} --sysroot=${BUILD_PREFIX}/${HOST}/sysroot -target ${HOST}" > icpx_for_conda.cfg
+export ICPXCFG="$(pwd)/icpx_for_conda.cfg"
+export ICXCFG="$(pwd)/icpx_for_conda.cfg"
+
 $PYTHON setup.py build_clib
 $PYTHON setup.py build_ext install
 
diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml
index b384776d2607..0c6e38f667db 100644
--- a/conda-recipe/meta.yaml
+++ b/conda-recipe/meta.yaml
@@ -11,17 +11,17 @@ requirements:
       - numpy 1.19
       - cython
       - cmake >=3.19
-      - dpctl >=0.13
-      - mkl-devel-dpcpp {{ environ.get('MKL_VER', '>=2021.1.1') }}
+      - dpctl >=0.14
+      - mkl-devel-dpcpp {{ environ.get('MKL_VER', '>=2023.0.0') }}
       - onedpl-devel
       - tbb-devel
       - wheel
     build:
       - {{ compiler('cxx') }}
-      - {{ compiler('dpcpp') }}  >=2022.1  # [not osx]
+      - {{ compiler('dpcpp') }}  >=2023.0  # [not osx]
     run:
       - python
-      - dpctl >=0.13
+      - dpctl >=0.14
       - {{ pin_compatible('dpcpp-cpp-rt', min_pin='x.x', max_pin='x') }}
       - {{ pin_compatible('mkl-dpcpp', min_pin='x.x', max_pin='x') }}
       - {{ pin_compatible('numpy', min_pin='x.x', max_pin='x') }}
diff --git a/doc/conf.py b/doc/conf.py
index 46505fa8f6db..999b2504bd64 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -33,7 +33,7 @@
 # The short X.Y version
 version = '0.11'
 # The full version, including alpha/beta/rc tags
-release = '0.11.0'
+release = '0.11.1'
 
 
 # -- General configuration ---------------------------------------------------
diff --git a/dpnp/backend/CMakeLists.txt b/dpnp/backend/CMakeLists.txt
index 1714124cf85a..baee709b11ee 100644
--- a/dpnp/backend/CMakeLists.txt
+++ b/dpnp/backend/CMakeLists.txt
@@ -1,5 +1,5 @@
 # *****************************************************************************
-# Copyright (c) 2016-2022, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -27,7 +27,7 @@
 
 cmake_minimum_required(VERSION 3.10 FATAL_ERROR)
 
-# set(DPNP_VERSION 0.11.0)
+# set(DPNP_VERSION 0.11.1)
 # set(DPNP_API_VERSION 0.11)
 
 # set directory where the custom finders live
@@ -93,6 +93,7 @@ string(CONCAT COMMON_COMPILE_FLAGS
   "-fsycl "
   "-fsycl-device-code-split=per_kernel "
   "-fno-approx-func "
+  "-fno-finite-math-only "
 )
 string(CONCAT COMMON_LINK_FLAGS
   "-fsycl "
@@ -111,7 +112,7 @@ elseif(WIN32)
   # set(CMAKE_RANLIB "llvm-ranlib")
   # set(CMAKE_CXX_FLAGS "/EHsc")
 
-  string(APPEND COMMON_COMPILER_FLAGS
+  string(APPEND COMMON_COMPILE_FLAGS
     "/EHsc "
 #    "/Ox "
 #    "/W3 "
@@ -133,23 +134,29 @@ string(CONCAT DPNP_WARNING_FLAGS
   "-Wextra "
   "-Wshadow "
   "-Wall "
-  "-Wstring-prototypes "
+  "-Wstrict-prototypes "
   "-Wformat "
   "-Wformat-security "
 )
-string(APPEND COMMON_COMPILER_FLAGS
+string(APPEND COMMON_COMPILE_FLAGS
   "${DPNP_WARNING_FLAGS}"
 )
 
 # debug/release compile definitions
 if(DPNP_DEBUG_ENABLE)
   set(CMAKE_BUILD_TYPE "Debug")
-  string(APPEND COMMON_COMPILER_FLAGS
+  string(APPEND COMMON_COMPILE_FLAGS
     "-O0 "
+    "-ggdb3 "
+  )
+  string(APPEND COMMON_LINK_FLAGS
+    "-O0 "
+    "-ggdb3 "
+    "-fsycl-link-huge-device-code "
   )
 else()
   set(CMAKE_BUILD_TYPE "Release")
-  string(APPEND COMMON_COMPILER_FLAGS
+  string(APPEND COMMON_COMPILE_FLAGS
     "-O3 "
   )
 endif()
@@ -162,7 +169,7 @@ string(CONCAT DPNP_DEFS
   "-D_FORTIFY_SOURCE=2 "
 )
 if(NOT WIN32)
-  string(APPEND COMMON_COMPILER_FLAGS
+  string(APPEND COMMON_COMPILE_FLAGS
     "-fno-delete-null-pointer-checks "
     "-fstack-protector-strong "
     "-fno-strict-overflow "
diff --git a/dpnp/backend/doc/Doxyfile b/dpnp/backend/doc/Doxyfile
index 6c83bb0e8465..3d6c971a7991 100644
--- a/dpnp/backend/doc/Doxyfile
+++ b/dpnp/backend/doc/Doxyfile
@@ -38,7 +38,7 @@ PROJECT_NAME           = "DPNP C++ backend kernel library"
 # could be handy for archiving the generated documentation or if some version
 # control system is used.
 
-PROJECT_NUMBER         = 0.11.0
+PROJECT_NUMBER         = 0.11.1
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a
diff --git a/dpnp/backend/include/dpnp_gen_1arg_1type_tbl.hpp b/dpnp/backend/include/dpnp_gen_1arg_1type_tbl.hpp
index f5ee23d755f2..0f6cb5b31deb 100644
--- a/dpnp/backend/include/dpnp_gen_1arg_1type_tbl.hpp
+++ b/dpnp/backend/include/dpnp_gen_1arg_1type_tbl.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2020, Intel Corporation
+// Copyright (c) 2016-2023, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -23,6 +23,8 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
+#if defined(MACRO_1ARG_1TYPE_OP)
+
 /*
  * This header file contains single argument element wise functions definitions
  *
@@ -35,10 +37,6 @@
  *
  */
 
-#ifndef MACRO_1ARG_1TYPE_OP
-#error "MACRO_1ARG_1TYPE_OP is not defined"
-#endif
-
 #ifdef _SECTION_DOCUMENTATION_GENERATION_
 
 #define MACRO_1ARG_1TYPE_OP(__name__, __operation1__, __operation2__)                                                   \
@@ -88,7 +86,7 @@
                   const shape_elem_type* input1_strides,                                                                \
                   const size_t* where);
 
-#endif
+#endif // _SECTION_DOCUMENTATION_GENERATION_
 
 MACRO_1ARG_1TYPE_OP(dpnp_conjugate_c, std::conj(input_elem), q.submit(kernel_func))
 MACRO_1ARG_1TYPE_OP(dpnp_copy_c, input_elem, q.submit(kernel_func))
@@ -107,3 +105,62 @@ MACRO_1ARG_1TYPE_OP(dpnp_square_c,
                     oneapi::mkl::vm::sqr(q, input1_size, input1_data, result))
 
 #undef MACRO_1ARG_1TYPE_OP
+
+#elif defined(MACRO_1ARG_1TYPE_LOGIC_OP)
+
+/*
+ * This header file contains single argument element wise functions definitions
+ *
+ * Macro `MACRO_1ARG_1TYPE_LOGIC_OP` must be defined before usage
+ *
+ * Parameters:
+ * - public name of the function and kernel name
+ * - operation used to calculate the result
+ *
+ */
+
+#ifdef _SECTION_DOCUMENTATION_GENERATION_
+
+#define MACRO_1ARG_1TYPE_LOGIC_OP(__name__, __operation__)                                                             \
+    /** @ingroup BACKEND_API                                                                                         */ \
+    /** @brief Per element operation function __name__                                                               */ \
+    /**                                                                                                              */ \
+    /** Function "__name__" executes operator "__operation__" over corresponding elements of input array             */ \
+    /**                                                                                                              */ \
+    /** @param[in]  q_ref              Reference to SYCL queue.                                                      */ \
+    /** @param[out] result_out         Output array.                                                                 */ \
+    /** @param[in]  result_size        Output array size.                                                            */ \
+    /** @param[in]  result_ndim        Number of output array dimensions.                                            */ \
+    /** @param[in]  result_shape       Output array shape.                                                           */ \
+    /** @param[in]  result_strides     Output array strides.                                                         */ \
+    /** @param[in]  input1_in          Input array 1.                                                                */ \
+    /** @param[in]  input1_size        Input array 1 size.                                                           */ \
+    /** @param[in]  input1_ndim        Number of input array 1 dimensions.                                           */ \
+    /** @param[in]  input1_shape       Input array 1 shape.                                                          */ \
+    /** @param[in]  input1_strides     Input array 1 strides.                                                        */ \
+    /** @param[in]  where              Where condition.                                                              */ \
+    /** @param[in]  dep_event_vec_ref  Reference to vector of SYCL events.                                           */ \
+    template <typename _DataType_input1>                                                                                \
+    DPCTLSyclEventRef __name__(DPCTLSyclQueueRef q_ref,                                                                 \
+                               void* result_out,                                                                        \
+                               const size_t result_size,                                                                \
+                               const size_t result_ndim,                                                                \
+                               const shape_elem_type* result_shape,                                                     \
+                               const shape_elem_type* result_strides,                                                   \
+                               const void* input1_in,                                                                   \
+                               const size_t input1_size,                                                                \
+                               const size_t input1_ndim,                                                                \
+                               const shape_elem_type* input1_shape,                                                     \
+                               const shape_elem_type* input1_strides,                                                   \
+                               const size_t* where,                                                                     \
+                               const DPCTLEventVectorRef dep_event_vec_ref);
+
+#endif // _SECTION_DOCUMENTATION_GENERATION_
+
+MACRO_1ARG_1TYPE_LOGIC_OP(dpnp_logical_not_c, !input1_elem)
+
+#undef MACRO_1ARG_1TYPE_LOGIC_OP
+
+#else
+#error "MACRO_1ARG_1TYPE_OP or MACRO_1ARG_1TYPE_LOGIC_OP is not defined"
+#endif // MACRO_1ARG_1TYPE_OP || MACRO_1ARG_1TYPE_LOGIC_OP
diff --git a/dpnp/backend/include/dpnp_gen_2arg_2type_tbl.hpp b/dpnp/backend/include/dpnp_gen_2arg_2type_tbl.hpp
new file mode 100644
index 000000000000..4b6c4290ef31
--- /dev/null
+++ b/dpnp/backend/include/dpnp_gen_2arg_2type_tbl.hpp
@@ -0,0 +1,99 @@
+//*****************************************************************************
+// Copyright (c) 2023, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+/*
+ * This header file contains single argument element wise functions definitions
+ *
+ * Macro `MACRO_2ARG_2TYPES_LOGIC_OP` must be defined before usage
+ *
+ * Parameters:
+ * - public name of the function and kernel name
+ * - operation used to calculate the result
+ *
+ */
+
+#ifndef MACRO_2ARG_2TYPES_LOGIC_OP
+#error "MACRO_2ARG_2TYPES_LOGIC_OP is not defined"
+#endif
+
+#ifdef _SECTION_DOCUMENTATION_GENERATION_
+
+#define MACRO_2ARG_2TYPES_LOGIC_OP(__name__, __operation__)                                                             \
+    /** @ingroup BACKEND_API                                                                                         */ \
+    /** @brief Per element operation function __name__                                                               */ \
+    /**                                                                                                              */ \
+    /** Function "__name__" executes operator "__operation__" over corresponding elements of input arrays            */ \
+    /**                                                                                                              */ \
+    /** @param[in]  q_ref              Reference to SYCL queue.                                                      */ \
+    /** @param[out] result_out         Output array.                                                                 */ \
+    /** @param[in]  result_size        Output array size.                                                            */ \
+    /** @param[in]  result_ndim        Number of output array dimensions.                                            */ \
+    /** @param[in]  result_shape       Output array shape.                                                           */ \
+    /** @param[in]  result_strides     Output array strides.                                                         */ \
+    /** @param[in]  input1_in          Input array 1.                                                                */ \
+    /** @param[in]  input1_size        Input array 1 size.                                                           */ \
+    /** @param[in]  input1_ndim        Number of input array 1 dimensions.                                           */ \
+    /** @param[in]  input1_shape       Input array 1 shape.                                                          */ \
+    /** @param[in]  input1_strides     Input array 1 strides.                                                        */ \
+    /** @param[in]  input2_in          Input array 2.                                                                */ \
+    /** @param[in]  input2_size        Input array 2 size.                                                           */ \
+    /** @param[in]  input2_ndim        Number of input array 2 dimensions.                                           */ \
+    /** @param[in]  input2_shape       Input array 2 shape.                                                          */ \
+    /** @param[in]  input2_strides     Input array 2 strides.                                                        */ \
+    /** @param[in]  where              Where condition.                                                              */ \
+    /** @param[in]  dep_event_vec_ref  Reference to vector of SYCL events.                                           */ \
+    template <typename _DataType_input1, typename _DataType_input2>                                                     \
+    DPCTLSyclEventRef __name__(DPCTLSyclQueueRef q_ref,                                                                 \
+                               void* result_out,                                                                        \
+                               const size_t result_size,                                                                \
+                               const size_t result_ndim,                                                                \
+                               const shape_elem_type* result_shape,                                                     \
+                               const shape_elem_type* result_strides,                                                   \
+                               const void* input1_in,                                                                   \
+                               const size_t input1_size,                                                                \
+                               const size_t input1_ndim,                                                                \
+                               const shape_elem_type* input1_shape,                                                     \
+                               const shape_elem_type* input1_strides,                                                   \
+                               const void* input2_in,                                                                   \
+                               const size_t input2_size,                                                                \
+                               const size_t input2_ndim,                                                                \
+                               const shape_elem_type* input2_shape,                                                     \
+                               const shape_elem_type* input2_strides,                                                   \
+                               const size_t* where,                                                                     \
+                               const DPCTLEventVectorRef dep_event_vec_ref);
+
+#endif
+
+MACRO_2ARG_2TYPES_LOGIC_OP(dpnp_equal_c, input1_elem == input2_elem)
+MACRO_2ARG_2TYPES_LOGIC_OP(dpnp_greater_c, input1_elem > input2_elem)
+MACRO_2ARG_2TYPES_LOGIC_OP(dpnp_greater_equal_c, input1_elem >= input2_elem)
+MACRO_2ARG_2TYPES_LOGIC_OP(dpnp_less_c, input1_elem < input2_elem)
+MACRO_2ARG_2TYPES_LOGIC_OP(dpnp_less_equal_c, input1_elem <= input2_elem)
+MACRO_2ARG_2TYPES_LOGIC_OP(dpnp_logical_and_c, input1_elem && input2_elem)
+MACRO_2ARG_2TYPES_LOGIC_OP(dpnp_logical_or_c, input1_elem || input2_elem)
+MACRO_2ARG_2TYPES_LOGIC_OP(dpnp_logical_xor_c, (!!input1_elem) != (!!input2_elem))
+MACRO_2ARG_2TYPES_LOGIC_OP(dpnp_not_equal_c, input1_elem != input2_elem)
+
+#undef MACRO_2ARG_2TYPES_LOGIC_OP
diff --git a/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp b/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp
index 5d4ae22f796f..e345c6eefea7 100644
--- a/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp
+++ b/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2020, Intel Corporation
+// Copyright (c) 2016-2023, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -31,7 +31,10 @@
  * Parameters:
  * - public name of the function and kernel name
  * - operation used to calculate the result
+ * - vector operation over SYCL group used to calculate the result
+ * - list of types vector operation accepts
  * - mkl operation used to calculate the result
+ * - list of types mkl operation accepts
  *
  */
 
@@ -41,11 +44,12 @@
 
 #ifdef _SECTION_DOCUMENTATION_GENERATION_
 
-#define MACRO_2ARG_3TYPES_OP(__name__, __operation1__, __operation2__)                                                  \
+#define MACRO_2ARG_3TYPES_OP(                                                                                           \
+    __name__, __operation__, __vec_operation__, __vec_types__, __mkl_operation__, __mkl_types__)                        \
     /** @ingroup BACKEND_API                                                                                         */ \
     /** @brief Per element operation function __name__                                                               */ \
     /**                                                                                                              */ \
-    /** Function "__name__" executes operator "__operation1__" over corresponding elements of input arrays           */ \
+    /** Function "__name__" executes operator "__operation__" over corresponding elements of input arrays            */ \
     /**                                                                                                              */ \
     /** @param[in]  q_ref              Reference to SYCL queue.                                                      */ \
     /** @param[out] result_out         Output array.                                                                 */ \
@@ -105,23 +109,84 @@
 
 #endif
 
-MACRO_2ARG_3TYPES_OP(dpnp_add_c, input1_elem + input2_elem, oneapi::mkl::vm::add)
-MACRO_2ARG_3TYPES_OP(dpnp_arctan2_c, sycl::atan2((double)input1_elem, (double)input2_elem), oneapi::mkl::vm::atan2)
+MACRO_2ARG_3TYPES_OP(dpnp_add_c,
+                     input1_elem + input2_elem,
+                     x1 + x2,
+                     MACRO_UNPACK_TYPES(bool, std::int32_t, std::int64_t),
+                     oneapi::mkl::vm::add,
+                     MACRO_UNPACK_TYPES(float, double, std::complex<float>, std::complex<double>))
+
+MACRO_2ARG_3TYPES_OP(dpnp_arctan2_c,
+                     sycl::atan2((double)input1_elem, (double)input2_elem),
+                     nullptr,
+                     std::false_type,
+                     oneapi::mkl::vm::atan2,
+                     MACRO_UNPACK_TYPES(float, double))
+
 MACRO_2ARG_3TYPES_OP(dpnp_copysign_c,
                      sycl::copysign((double)input1_elem, (double)input2_elem),
-                     oneapi::mkl::vm::copysign)
-MACRO_2ARG_3TYPES_OP(dpnp_divide_c, input1_elem / input2_elem, oneapi::mkl::vm::div)
-MACRO_2ARG_3TYPES_OP(dpnp_fmod_c, sycl::fmod((double)input1_elem, (double)input2_elem), oneapi::mkl::vm::fmod)
-MACRO_2ARG_3TYPES_OP(dpnp_hypot_c, sycl::hypot((double)input1_elem, (double)input2_elem), oneapi::mkl::vm::hypot)
-MACRO_2ARG_3TYPES_OP(dpnp_maximum_c, sycl::max(input1_elem, input2_elem), oneapi::mkl::vm::fmax)
-MACRO_2ARG_3TYPES_OP(dpnp_minimum_c, sycl::min(input1_elem, input2_elem), oneapi::mkl::vm::fmin)
+                     nullptr,
+                     std::false_type,
+                     oneapi::mkl::vm::copysign,
+                     MACRO_UNPACK_TYPES(float, double))
+
+MACRO_2ARG_3TYPES_OP(dpnp_divide_c,
+                     input1_elem / input2_elem,
+                     x1 / x2,
+                     MACRO_UNPACK_TYPES(bool, std::int32_t, std::int64_t),
+                     oneapi::mkl::vm::div,
+                     MACRO_UNPACK_TYPES(float, double, std::complex<float>, std::complex<double>))
+
+MACRO_2ARG_3TYPES_OP(dpnp_fmod_c,
+                     sycl::fmod((double)input1_elem, (double)input2_elem),
+                     nullptr,
+                     std::false_type,
+                     oneapi::mkl::vm::fmod,
+                     MACRO_UNPACK_TYPES(float, double))
+
+MACRO_2ARG_3TYPES_OP(dpnp_hypot_c,
+                     sycl::hypot((double)input1_elem, (double)input2_elem),
+                     nullptr,
+                     std::false_type,
+                     oneapi::mkl::vm::hypot,
+                     MACRO_UNPACK_TYPES(float, double))
+
+MACRO_2ARG_3TYPES_OP(dpnp_maximum_c,
+                     sycl::max(input1_elem, input2_elem),
+                     nullptr,
+                     std::false_type,
+                     oneapi::mkl::vm::fmax,
+                     MACRO_UNPACK_TYPES(float, double))
+
+MACRO_2ARG_3TYPES_OP(dpnp_minimum_c,
+                     sycl::min(input1_elem, input2_elem),
+                     nullptr,
+                     std::false_type,
+                     oneapi::mkl::vm::fmin,
+                     MACRO_UNPACK_TYPES(float, double))
 
 // "multiply" needs to be standalone kernel (not autogenerated) due to complex algorithm. This is not an element wise.
 // pytest "tests/third_party/cupy/creation_tests/test_ranges.py::TestMgrid::test_mgrid3"
 // requires multiplication shape1[10] with shape2[10,1] and result expected as shape[10,10]
-MACRO_2ARG_3TYPES_OP(dpnp_multiply_c, input1_elem* input2_elem, oneapi::mkl::vm::mul)
+MACRO_2ARG_3TYPES_OP(dpnp_multiply_c,
+                     input1_elem * input2_elem,
+                     x1 * x2,
+                     MACRO_UNPACK_TYPES(bool, std::int32_t, std::int64_t),
+                     oneapi::mkl::vm::mul,
+                     MACRO_UNPACK_TYPES(float, double, std::complex<float>, std::complex<double>))
+
+MACRO_2ARG_3TYPES_OP(dpnp_power_c,
+                     sycl::pow((double)input1_elem, (double)input2_elem),
+                     nullptr,
+                     std::false_type,
+                     oneapi::mkl::vm::pow,
+                     MACRO_UNPACK_TYPES(float, double))
 
-MACRO_2ARG_3TYPES_OP(dpnp_power_c, sycl::pow((double)input1_elem, (double)input2_elem), oneapi::mkl::vm::pow)
-MACRO_2ARG_3TYPES_OP(dpnp_subtract_c, input1_elem - input2_elem, oneapi::mkl::vm::sub)
+MACRO_2ARG_3TYPES_OP(dpnp_subtract_c,
+                     input1_elem - input2_elem,
+                     x1 - x2,
+                     MACRO_UNPACK_TYPES(bool, std::int32_t, std::int64_t),
+                     oneapi::mkl::vm::sub,
+                     MACRO_UNPACK_TYPES(float, double, std::complex<float>, std::complex<double>))
 
 #undef MACRO_2ARG_3TYPES_OP
diff --git a/dpnp/backend/include/dpnp_iface.hpp b/dpnp/backend/include/dpnp_iface.hpp
index 42c05f0fd61d..7a80b40a3d2e 100644
--- a/dpnp/backend/include/dpnp_iface.hpp
+++ b/dpnp/backend/include/dpnp_iface.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2020, Intel Corporation
+// Copyright (c) 2016-2023, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -95,7 +95,7 @@ INP_DLLEXPORT void dpnp_queue_initialize_c(QueueOptions selector = QueueOptions:
  * @ingroup BACKEND_API
  * @brief SYCL queue device status.
  *
- * Return 1 if current @ref queue is related to cpu or host device. return 0 otherwise.
+ * Return 1 if current @ref queue is related to cpu device. return 0 otherwise.
  */
 INP_DLLEXPORT size_t dpnp_queue_is_cpu_c();
 
@@ -1806,7 +1806,31 @@ INP_DLLEXPORT void dpnp_invert_c(void* array1_in, void* result, size_t size);
 
 #include <dpnp_gen_1arg_2type_tbl.hpp>
 
-#define MACRO_2ARG_3TYPES_OP(__name__, __operation1__, __operation2__)                                                 \
+#define MACRO_2ARG_2TYPES_LOGIC_OP(__name__, __operation__)                                                           \
+    template <typename _DataType_output, typename _DataType_input1, typename _DataType_input2>                         \
+    INP_DLLEXPORT DPCTLSyclEventRef __name__(DPCTLSyclQueueRef q_ref,                                                  \
+                                             void* result_out,                                                         \
+                                             const size_t result_size,                                                 \
+                                             const size_t result_ndim,                                                 \
+                                             const shape_elem_type* result_shape,                                      \
+                                             const shape_elem_type* result_strides,                                    \
+                                             const void* input1_in,                                                    \
+                                             const size_t input1_size,                                                 \
+                                             const size_t input1_ndim,                                                 \
+                                             const shape_elem_type* input1_shape,                                      \
+                                             const shape_elem_type* input1_strides,                                    \
+                                             const void* input2_in,                                                    \
+                                             const size_t input2_size,                                                 \
+                                             const size_t input2_ndim,                                                 \
+                                             const shape_elem_type* input2_shape,                                      \
+                                             const shape_elem_type* input2_strides,                                    \
+                                             const size_t* where,                                                      \
+                                             const DPCTLEventVectorRef dep_event_vec_ref);
+
+#include <dpnp_gen_2arg_2type_tbl.hpp>
+
+#define MACRO_2ARG_3TYPES_OP(                                                                                          \
+    __name__, __operation__, __vec_operation__, __vec_types__, __mkl_operation__, __mkl_types__)                       \
     template <typename _DataType_output, typename _DataType_input1, typename _DataType_input2>                         \
     INP_DLLEXPORT DPCTLSyclEventRef __name__(DPCTLSyclQueueRef q_ref,                                                  \
                                              void* result_out,                                                         \
diff --git a/dpnp/backend/include/dpnp_iface_fptr.hpp b/dpnp/backend/include/dpnp_iface_fptr.hpp
index 8e209d38317a..fb154fcabfac 100644
--- a/dpnp/backend/include/dpnp_iface_fptr.hpp
+++ b/dpnp/backend/include/dpnp_iface_fptr.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2022, Intel Corporation
+// Copyright (c) 2016-2023, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -151,10 +151,10 @@ enum class DPNPFuncName : size_t
     DPNP_FN_EIG_EXT,                      /**< Used in numpy.linalg.eig() impl, requires extra parameters */
     DPNP_FN_EIGVALS,                      /**< Used in numpy.linalg.eigvals() impl  */
     DPNP_FN_EIGVALS_EXT,                  /**< Used in numpy.linalg.eigvals() impl, requires extra parameters */
+    DPNP_FN_EQUAL_EXT,                    /**< Used in numpy.equal() impl, requires extra parameters */
     DPNP_FN_ERF,                          /**< Used in scipy.special.erf impl  */
     DPNP_FN_ERF_EXT,                      /**< Used in scipy.special.erf impl, requires extra parameters */
     DPNP_FN_EYE,                          /**< Used in numpy.eye() impl  */
-    DPNP_FN_EYE_EXT,                      /**< Used in numpy.eye() impl, requires extra parameters */
     DPNP_FN_EXP,                          /**< Used in numpy.exp() impl  */
     DPNP_FN_EXP_EXT,                      /**< Used in numpy.exp() impl, requires extra parameters */
     DPNP_FN_EXP2,                         /**< Used in numpy.exp2() impl  */
@@ -179,6 +179,8 @@ enum class DPNPFuncName : size_t
     DPNP_FN_FMOD_EXT,                     /**< Used in numpy.fmod() impl, requires extra parameters  */
     DPNP_FN_FULL,                         /**< Used in numpy.full() impl  */
     DPNP_FN_FULL_LIKE,                    /**< Used in numpy.full_like() impl  */
+    DPNP_FN_GREATER_EXT,                  /**< Used in numpy.greater() impl, requires extra parameters */
+    DPNP_FN_GREATER_EQUAL_EXT,            /**< Used in numpy.greater_equal() impl, requires extra parameters */
     DPNP_FN_HYPOT,                        /**< Used in numpy.hypot() impl  */
     DPNP_FN_HYPOT_EXT,                    /**< Used in numpy.hypot() impl, requires extra parameters  */
     DPNP_FN_IDENTITY,                     /**< Used in numpy.identity() impl  */
@@ -193,6 +195,8 @@ enum class DPNPFuncName : size_t
     DPNP_FN_KRON_EXT,                     /**< Used in numpy.kron() impl, requires extra parameters  */
     DPNP_FN_LEFT_SHIFT,                   /**< Used in numpy.left_shift() impl  */
     DPNP_FN_LEFT_SHIFT_EXT,               /**< Used in numpy.left_shift() impl, requires extra parameters  */
+    DPNP_FN_LESS_EXT,                     /**< Used in numpy.less() impl, requires extra parameters */
+    DPNP_FN_LESS_EQUAL_EXT,               /**< Used in numpy.less_equal() impl, requires extra parameters */
     DPNP_FN_LOG,                          /**< Used in numpy.log() impl  */
     DPNP_FN_LOG_EXT,                      /**< Used in numpy.log() impl, requires extra parameters  */
     DPNP_FN_LOG10,                        /**< Used in numpy.log10() impl  */
@@ -201,6 +205,10 @@ enum class DPNPFuncName : size_t
     DPNP_FN_LOG2_EXT,                     /**< Used in numpy.log2() impl, requires extra parameters  */
     DPNP_FN_LOG1P,                        /**< Used in numpy.log1p() impl  */
     DPNP_FN_LOG1P_EXT,                    /**< Used in numpy.log1p() impl, requires extra parameters  */
+    DPNP_FN_LOGICAL_AND_EXT,              /**< Used in numpy.logical_and() impl, requires extra parameters */
+    DPNP_FN_LOGICAL_NOT_EXT,              /**< Used in numpy.logical_not() impl, requires extra parameters */
+    DPNP_FN_LOGICAL_OR_EXT,               /**< Used in numpy.logical_or() impl, requires extra parameters */
+    DPNP_FN_LOGICAL_XOR_EXT,              /**< Used in numpy.logical_xor() impl, requires extra parameters */
     DPNP_FN_MATMUL,                       /**< Used in numpy.matmul() impl  */
     DPNP_FN_MATMUL_EXT,                   /**< Used in numpy.matmul() impl, requires extra parameters */
     DPNP_FN_MATRIX_RANK,                  /**< Used in numpy.linalg.matrix_rank() impl  */
@@ -227,6 +235,7 @@ enum class DPNPFuncName : size_t
     DPNP_FN_NEGATIVE_EXT,                 /**< Used in numpy.negative() impl, requires extra parameters */
     DPNP_FN_NONZERO,                      /**< Used in numpy.nonzero() impl  */
     DPNP_FN_NONZERO_EXT,                  /**< Used in numpy.nonzero() impl, requires extra parameters */
+    DPNP_FN_NOT_EQUAL_EXT,                /**< Used in numpy.not_equal() impl, requires extra parameters */
     DPNP_FN_ONES,                         /**< Used in numpy.ones() impl */
     DPNP_FN_ONES_LIKE,                    /**< Used in numpy.ones_like() impl */
     DPNP_FN_PARTITION,                    /**< Used in numpy.partition() impl */
@@ -361,9 +370,7 @@ enum class DPNPFuncName : size_t
     DPNP_FN_TRI,                          /**< Used in numpy.tri() impl  */
     DPNP_FN_TRI_EXT,                      /**< Used in numpy.tri() impl, requires extra parameters */
     DPNP_FN_TRIL,                         /**< Used in numpy.tril() impl  */
-    DPNP_FN_TRIL_EXT,                     /**< Used in numpy.tril() impl, requires extra parameters */
     DPNP_FN_TRIU,                         /**< Used in numpy.triu() impl  */
-    DPNP_FN_TRIU_EXT,                     /**< Used in numpy.triu() impl, requires extra parameters */
     DPNP_FN_TRUNC,                        /**< Used in numpy.trunc() impl  */
     DPNP_FN_TRUNC_EXT,                    /**< Used in numpy.trunc() impl, requires extra parameters */
     DPNP_FN_VANDER,                       /**< Used in numpy.vander() impl  */
@@ -385,13 +392,13 @@ enum class DPNPFuncName : size_t
 enum class DPNPFuncType : size_t
 {
     DPNP_FT_NONE,     /**< Very first element of the enumeration */
+    DPNP_FT_BOOL,     /**< analog of numpy.bool_ or bool */
     DPNP_FT_INT,      /**< analog of numpy.int32 or int */
     DPNP_FT_LONG,     /**< analog of numpy.int64 or long */
     DPNP_FT_FLOAT,    /**< analog of numpy.float32 or float */
     DPNP_FT_DOUBLE,   /**< analog of numpy.float32 or double */
     DPNP_FT_CMPLX64,  /**< analog of numpy.complex64 or std::complex<float> */
-    DPNP_FT_CMPLX128, /**< analog of numpy.complex128 or std::complex<double> */
-    DPNP_FT_BOOL      /**< analog of numpy.bool or numpy.bool_ or bool */
+    DPNP_FT_CMPLX128  /**< analog of numpy.complex128 or std::complex<double> */
 };
 
 /**
@@ -410,8 +417,26 @@ size_t operator-(DPNPFuncType lhs, DPNPFuncType rhs);
  */
 typedef struct DPNPFuncData
 {
-    DPNPFuncType return_type; /**< return type identifier which expected by the @ref ptr function */
-    void* ptr;                /**< C++ backend function pointer */
+    DPNPFuncData(const DPNPFuncType gen_type, void* gen_ptr, const DPNPFuncType type_no_fp64, void* ptr_no_fp64)
+        : return_type(gen_type)
+        , ptr(gen_ptr)
+        , return_type_no_fp64(type_no_fp64)
+        , ptr_no_fp64(ptr_no_fp64)
+    {
+    }
+    DPNPFuncData(const DPNPFuncType gen_type, void* gen_ptr)
+        : DPNPFuncData(gen_type, gen_ptr, DPNPFuncType::DPNP_FT_NONE, nullptr)
+    {
+    }
+    DPNPFuncData()
+        : DPNPFuncData(DPNPFuncType::DPNP_FT_NONE, nullptr)
+    {
+    }
+
+    DPNPFuncType return_type;         /**< return type identifier which expected by the @ref ptr function */
+    void* ptr;                        /**< C++ backend function pointer */
+    DPNPFuncType return_type_no_fp64; /**< alternative return type identifier when no fp64 support by device */
+    void* ptr_no_fp64;                /**< alternative C++ backend function pointer when no fp64 support by device */
 } DPNPFuncData_t;
 
 /**
diff --git a/dpnp/backend/kernels/dpnp_krnl_arraycreation.cpp b/dpnp/backend/kernels/dpnp_krnl_arraycreation.cpp
index 71d93842feb0..a29fcca0975b 100644
--- a/dpnp/backend/kernels/dpnp_krnl_arraycreation.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_arraycreation.cpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2022, Intel Corporation
+// Copyright (c) 2016-2023, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -292,13 +292,6 @@ void dpnp_eye_c(void* result1, int k, const shape_elem_type* res_shape)
 template <typename _DataType>
 void (*dpnp_eye_default_c)(void*, int, const shape_elem_type*) = dpnp_eye_c<_DataType>;
 
-template <typename _DataType>
-DPCTLSyclEventRef (*dpnp_eye_ext_c)(DPCTLSyclQueueRef,
-                                    void*,
-                                    int,
-                                    const shape_elem_type*,
-                                    const DPCTLEventVectorRef) = dpnp_eye_c<_DataType>;
-
 template <typename _DataType>
 DPCTLSyclEventRef dpnp_full_c(DPCTLSyclQueueRef q_ref,
                               void* array_in,
@@ -1062,17 +1055,6 @@ void (*dpnp_tril_default_c)(void*,
                             const size_t,
                             const size_t) = dpnp_tril_c<_DataType>;
 
-template <typename _DataType>
-DPCTLSyclEventRef (*dpnp_tril_ext_c)(DPCTLSyclQueueRef,
-                                     void*,
-                                     void*,
-                                     const int,
-                                     shape_elem_type*,
-                                     shape_elem_type*,
-                                     const size_t,
-                                     const size_t,
-                                     const DPCTLEventVectorRef) = dpnp_tril_c<_DataType>;
-
 template <typename _DataType>
 DPCTLSyclEventRef dpnp_triu_c(DPCTLSyclQueueRef q_ref,
                               void* array_in,
@@ -1225,17 +1207,6 @@ void (*dpnp_triu_default_c)(void*,
                             const size_t,
                             const size_t) = dpnp_triu_c<_DataType>;
 
-template <typename _DataType>
-DPCTLSyclEventRef (*dpnp_triu_ext_c)(DPCTLSyclQueueRef,
-                                     void*,
-                                     void*,
-                                     const int,
-                                     shape_elem_type*,
-                                     shape_elem_type*,
-                                     const size_t,
-                                     const size_t,
-                                     const DPCTLEventVectorRef) = dpnp_triu_c<_DataType>;
-
 template <typename _DataType>
 DPCTLSyclEventRef dpnp_zeros_c(DPCTLSyclQueueRef q_ref,
                                void* result,
@@ -1319,11 +1290,6 @@ void func_map_init_arraycreation(func_map_t& fmap)
     fmap[DPNPFuncName::DPNP_FN_EYE][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_eye_default_c<float>};
     fmap[DPNPFuncName::DPNP_FN_EYE][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_eye_default_c<double>};
 
-    fmap[DPNPFuncName::DPNP_FN_EYE_EXT][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_eye_ext_c<int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_EYE_EXT][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_eye_ext_c<int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_EYE_EXT][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_eye_ext_c<float>};
-    fmap[DPNPFuncName::DPNP_FN_EYE_EXT][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_eye_ext_c<double>};
-
     fmap[DPNPFuncName::DPNP_FN_FULL][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_full_default_c<int32_t>};
     fmap[DPNPFuncName::DPNP_FN_FULL][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_full_default_c<int64_t>};
     fmap[DPNPFuncName::DPNP_FN_FULL][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_full_default_c<float>};
@@ -1451,21 +1417,11 @@ void func_map_init_arraycreation(func_map_t& fmap)
     fmap[DPNPFuncName::DPNP_FN_TRIL][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_tril_default_c<float>};
     fmap[DPNPFuncName::DPNP_FN_TRIL][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_tril_default_c<double>};
 
-    fmap[DPNPFuncName::DPNP_FN_TRIL_EXT][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_tril_ext_c<int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_TRIL_EXT][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_tril_ext_c<int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_TRIL_EXT][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_tril_ext_c<float>};
-    fmap[DPNPFuncName::DPNP_FN_TRIL_EXT][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_tril_ext_c<double>};
-
     fmap[DPNPFuncName::DPNP_FN_TRIU][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_triu_default_c<int32_t>};
     fmap[DPNPFuncName::DPNP_FN_TRIU][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_triu_default_c<int64_t>};
     fmap[DPNPFuncName::DPNP_FN_TRIU][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_triu_default_c<float>};
     fmap[DPNPFuncName::DPNP_FN_TRIU][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_triu_default_c<double>};
 
-    fmap[DPNPFuncName::DPNP_FN_TRIU_EXT][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_triu_ext_c<int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_TRIU_EXT][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_triu_ext_c<int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_TRIU_EXT][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_triu_ext_c<float>};
-    fmap[DPNPFuncName::DPNP_FN_TRIU_EXT][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_triu_ext_c<double>};
-
     fmap[DPNPFuncName::DPNP_FN_ZEROS][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_zeros_default_c<int32_t>};
     fmap[DPNPFuncName::DPNP_FN_ZEROS][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_zeros_default_c<int64_t>};
     fmap[DPNPFuncName::DPNP_FN_ZEROS][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_zeros_default_c<float>};
diff --git a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp
index eafa50d4cee2..5133473d3935 100644
--- a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2022, Intel Corporation
+// Copyright (c) 2016-2023, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -825,7 +825,9 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap)
     return;
 }
 
-#define MACRO_2ARG_3TYPES_OP(__name__, __operation1__, __operation2__)                                                 \
+
+#define MACRO_2ARG_3TYPES_OP(                                                                                          \
+    __name__, __operation__, __vec_operation__, __vec_types__, __mkl_operation__, __mkl_types__)                       \
     template <typename _KernelNameSpecialization1,                                                                     \
               typename _KernelNameSpecialization2,                                                                     \
               typename _KernelNameSpecialization3>                                                                     \
@@ -834,6 +836,11 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap)
     template <typename _KernelNameSpecialization1,                                                                     \
               typename _KernelNameSpecialization2,                                                                     \
               typename _KernelNameSpecialization3>                                                                     \
+    class __name__##_sg_kernel;                                                                                        \
+                                                                                                                       \
+    template <typename _KernelNameSpecialization1,                                                                     \
+              typename _KernelNameSpecialization2,                                                                     \
+              typename _KernelNameSpecialization3>                                                                     \
     class __name__##_broadcast_kernel;                                                                                 \
                                                                                                                        \
     template <typename _KernelNameSpecialization1,                                                                     \
@@ -874,45 +881,23 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap)
                                                                                                                        \
         sycl::queue q = *(reinterpret_cast<sycl::queue*>(q_ref));                                                      \
                                                                                                                        \
-        DPNPC_ptr_adapter<_DataType_input1> input1_ptr(q_ref, input1_in, input1_size);                                 \
-        DPNPC_ptr_adapter<shape_elem_type> input1_shape_ptr(q_ref, input1_shape, input1_ndim, true);                   \
-        DPNPC_ptr_adapter<shape_elem_type> input1_strides_ptr(q_ref, input1_strides, input1_ndim, true);               \
-        DPNPC_ptr_adapter<_DataType_input2> input2_ptr(q_ref, input2_in, input2_size);                                 \
-        DPNPC_ptr_adapter<shape_elem_type> input2_shape_ptr(q_ref, input2_shape, input2_ndim, true);                   \
-        DPNPC_ptr_adapter<shape_elem_type> input2_strides_ptr(q_ref, input2_strides, input2_ndim, true);               \
-                                                                                                                       \
-        DPNPC_ptr_adapter<_DataType_output> result_ptr(q_ref, result_out, result_size, false, true);                   \
-        DPNPC_ptr_adapter<shape_elem_type> result_shape_ptr(q_ref, result_shape, result_ndim);                         \
-        DPNPC_ptr_adapter<shape_elem_type> result_strides_ptr(q_ref, result_strides, result_ndim);                     \
-                                                                                                                       \
-        _DataType_input1* input1_data = input1_ptr.get_ptr();                                                          \
-        shape_elem_type* input1_shape_data = input1_shape_ptr.get_ptr();                                               \
-        shape_elem_type* input1_strides_data = input1_strides_ptr.get_ptr();                                           \
+        _DataType_input1* input1_data = static_cast<_DataType_input1*>(const_cast<void*>(input1_in));                  \
+        _DataType_input2* input2_data = static_cast<_DataType_input2*>(const_cast<void*>(input2_in));                  \
+        _DataType_output* result = static_cast<_DataType_output*>(result_out);                                         \
                                                                                                                        \
-        _DataType_input2* input2_data = input2_ptr.get_ptr();                                                          \
-        shape_elem_type* input2_shape_data = input2_shape_ptr.get_ptr();                                               \
-        shape_elem_type* input2_strides_data = input2_strides_ptr.get_ptr();                                           \
+        bool use_broadcasting = !array_equal(input1_shape, input1_ndim, input2_shape, input2_ndim);                    \
                                                                                                                        \
-        _DataType_output* result = result_ptr.get_ptr();                                                               \
-        shape_elem_type* result_shape_data = result_shape_ptr.get_ptr();                                               \
-        shape_elem_type* result_strides_data = result_strides_ptr.get_ptr();                                           \
+        shape_elem_type* input1_shape_offsets = new shape_elem_type[input1_ndim];                                      \
                                                                                                                        \
-        bool use_broadcasting = !array_equal(input1_shape_data, input1_ndim, input2_shape_data, input2_ndim);          \
+        get_shape_offsets_inkernel(input1_shape, input1_ndim, input1_shape_offsets);                                   \
+        bool use_strides = !array_equal(input1_strides, input1_ndim, input1_shape_offsets, input1_ndim);               \
+        delete[] input1_shape_offsets;                                                                                 \
                                                                                                                        \
-        const size_t input1_shape_size_in_bytes = input1_ndim * sizeof(shape_elem_type);                               \
-        shape_elem_type* input1_shape_offsets =                                                                        \
-            reinterpret_cast<shape_elem_type*>(sycl::malloc_shared(input1_shape_size_in_bytes, q));                    \
-        get_shape_offsets_inkernel(input1_shape_data, input1_ndim, input1_shape_offsets);                              \
-        bool use_strides = !array_equal(input1_strides_data, input1_ndim, input1_shape_offsets, input1_ndim);          \
-        sycl::free(input1_shape_offsets, q);                                                                           \
+        shape_elem_type* input2_shape_offsets = new shape_elem_type[input2_ndim];                                      \
                                                                                                                        \
-        const size_t input2_shape_size_in_bytes = input2_ndim * sizeof(shape_elem_type);                               \
-        shape_elem_type* input2_shape_offsets =                                                                        \
-            reinterpret_cast<shape_elem_type*>(sycl::malloc_shared(input2_shape_size_in_bytes, q));                    \
-        get_shape_offsets_inkernel(input2_shape_data, input2_ndim, input2_shape_offsets);                              \
-        use_strides =                                                                                                  \
-            use_strides || !array_equal(input2_strides_data, input2_ndim, input2_shape_offsets, input2_ndim);          \
-        sycl::free(input2_shape_offsets, q);                                                                           \
+        get_shape_offsets_inkernel(input2_shape, input2_ndim, input2_shape_offsets);                                   \
+        use_strides = use_strides || !array_equal(input2_strides, input2_ndim, input2_shape_offsets, input2_ndim);     \
+        delete[] input2_shape_offsets;                                                                                 \
                                                                                                                        \
         sycl::event event;                                                                                             \
         sycl::range<1> gws(result_size);                                                                               \
@@ -921,28 +906,26 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap)
         {                                                                                                              \
             DPNPC_id<_DataType_input1>* input1_it;                                                                     \
             const size_t input1_it_size_in_bytes = sizeof(DPNPC_id<_DataType_input1>);                                 \
-            input1_it = reinterpret_cast<DPNPC_id<_DataType_input1>*>(dpnp_memory_alloc_c(q_ref,                       \
-                                                                                          input1_it_size_in_bytes));   \
-            new (input1_it)                                                                                            \
-                DPNPC_id<_DataType_input1>(q_ref, input1_data, input1_shape_data, input1_strides_data, input1_ndim);   \
+            input1_it =                                                                                                \
+                reinterpret_cast<DPNPC_id<_DataType_input1>*>(dpnp_memory_alloc_c(q_ref, input1_it_size_in_bytes));    \
+            new (input1_it) DPNPC_id<_DataType_input1>(q_ref, input1_data, input1_shape, input1_strides, input1_ndim); \
                                                                                                                        \
-            input1_it->broadcast_to_shape(result_shape_data, result_ndim);                                             \
+            input1_it->broadcast_to_shape(result_shape, result_ndim);                                                  \
                                                                                                                        \
             DPNPC_id<_DataType_input2>* input2_it;                                                                     \
             const size_t input2_it_size_in_bytes = sizeof(DPNPC_id<_DataType_input2>);                                 \
-            input2_it = reinterpret_cast<DPNPC_id<_DataType_input2>*>(dpnp_memory_alloc_c(q_ref,                       \
-                                                                                          input2_it_size_in_bytes));   \
-            new (input2_it)                                                                                            \
-                DPNPC_id<_DataType_input2>(q_ref, input2_data, input2_shape_data, input2_strides_data, input2_ndim);   \
+            input2_it =                                                                                                \
+                reinterpret_cast<DPNPC_id<_DataType_input2>*>(dpnp_memory_alloc_c(q_ref, input2_it_size_in_bytes));    \
+            new (input2_it) DPNPC_id<_DataType_input2>(q_ref, input2_data, input2_shape, input2_strides, input2_ndim); \
                                                                                                                        \
-            input2_it->broadcast_to_shape(result_shape_data, result_ndim);                                             \
+            input2_it->broadcast_to_shape(result_shape, result_ndim);                                                  \
                                                                                                                        \
             auto kernel_parallel_for_func = [=](sycl::id<1> global_id) {                                               \
-                const size_t i = global_id[0]; /*for (size_t i = 0; i < result_size; ++i)*/                            \
+                const size_t i = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */                          \
                 {                                                                                                      \
                     const _DataType_output input1_elem = (*input1_it)[i];                                              \
                     const _DataType_output input2_elem = (*input2_it)[i];                                              \
-                    result[i] = __operation1__;                                                                        \
+                    result[i] = __operation__;                                                                         \
                 }                                                                                                      \
             };                                                                                                         \
             auto kernel_func = [&](sycl::handler& cgh) {                                                               \
@@ -951,8 +934,7 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap)
                     gws, kernel_parallel_for_func);                                                                    \
             };                                                                                                         \
                                                                                                                        \
-            event = q.submit(kernel_func);                                                                             \
-            event.wait();                                                                                              \
+            q.submit(kernel_func).wait();                                                                              \
                                                                                                                        \
             input1_it->~DPNPC_id();                                                                                    \
             input2_it->~DPNPC_id();                                                                                    \
@@ -961,11 +943,41 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap)
         }                                                                                                              \
         else if (use_strides)                                                                                          \
         {                                                                                                              \
+            if ((result_ndim != input1_ndim) || (result_ndim != input2_ndim))                                          \
+            {                                                                                                          \
+                throw std::runtime_error("Result ndim=" + std::to_string(result_ndim) +                                \
+                                         " mismatches with either input1 ndim=" + std::to_string(input1_ndim) +        \
+                                         " or input2 ndim=" + std::to_string(input2_ndim));                            \
+            }                                                                                                          \
+                                                                                                                       \
+            /* memory transfer optimization, use USM-host for temporary speeds up tranfer to device */                 \
+            using usm_host_allocatorT = sycl::usm_allocator<shape_elem_type, sycl::usm::alloc::host>;                  \
+                                                                                                                       \
+            size_t strides_size = 3 * result_ndim;                                                                     \
+            shape_elem_type* dev_strides_data = sycl::malloc_device<shape_elem_type>(strides_size, q);                 \
+                                                                                                                       \
+            /* create host temporary for packed strides managed by shared pointer */                                   \
+            auto strides_host_packed =                                                                                 \
+                std::vector<shape_elem_type, usm_host_allocatorT>(strides_size, usm_host_allocatorT(q));               \
+                                                                                                                       \
+            /* packed vector is concatenation of result_strides, input1_strides and input2_strides */                  \
+            std::copy(result_strides, result_strides + result_ndim, strides_host_packed.begin());                      \
+            std::copy(input1_strides, input1_strides + result_ndim, strides_host_packed.begin() + result_ndim);        \
+            std::copy(input2_strides, input2_strides + result_ndim, strides_host_packed.begin() + 2 * result_ndim);    \
+                                                                                                                       \
+            auto copy_strides_ev =                                                                                     \
+                q.copy<shape_elem_type>(strides_host_packed.data(), dev_strides_data, strides_host_packed.size());     \
+                                                                                                                       \
             auto kernel_parallel_for_func = [=](sycl::id<1> global_id) {                                               \
-                const size_t output_id = global_id[0]; /*for (size_t i = 0; i < result_size; ++i)*/                    \
+                const size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */                  \
                 {                                                                                                      \
+                    const shape_elem_type* result_strides_data = &dev_strides_data[0];                                 \
+                    const shape_elem_type* input1_strides_data = &dev_strides_data[1];                                 \
+                    const shape_elem_type* input2_strides_data = &dev_strides_data[2];                                 \
+                                                                                                                       \
                     size_t input1_id = 0;                                                                              \
                     size_t input2_id = 0;                                                                              \
+                                                                                                                       \
                     for (size_t i = 0; i < result_ndim; ++i)                                                           \
                     {                                                                                                  \
                         const size_t output_xyz_id =                                                                   \
@@ -976,34 +988,118 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap)
                                                                                                                        \
                     const _DataType_output input1_elem = input1_data[input1_id];                                       \
                     const _DataType_output input2_elem = input2_data[input2_id];                                       \
-                    result[output_id] = __operation1__;                                                                \
+                    result[output_id] = __operation__;                                                                 \
                 }                                                                                                      \
             };                                                                                                         \
             auto kernel_func = [&](sycl::handler& cgh) {                                                               \
+                cgh.depends_on(copy_strides_ev);                                                                       \
                 cgh.parallel_for<                                                                                      \
                     class __name__##_strides_kernel<_DataType_output, _DataType_input1, _DataType_input2>>(            \
                     gws, kernel_parallel_for_func);                                                                    \
             };                                                                                                         \
                                                                                                                        \
-            event = q.submit(kernel_func);                                                                             \
+            q.submit(kernel_func).wait();                                                                              \
+                                                                                                                       \
+            sycl::free(dev_strides_data, q);                                                                           \
+            return event_ref;                                                                                          \
         }                                                                                                              \
         else                                                                                                           \
         {                                                                                                              \
-            if constexpr ((std::is_same<_DataType_input1, double>::value ||                                            \
-                           std::is_same<_DataType_input1, float>::value) &&                                            \
-                          std::is_same<_DataType_input2, _DataType_input1>::value)                                     \
+            if constexpr (both_types_are_same<_DataType_input1, _DataType_input2, __mkl_types__>)                      \
             {                                                                                                          \
-                event = __operation2__(q, result_size, input1_data, input2_data, result);                              \
+                event = __mkl_operation__(q, result_size, input1_data, input2_data, result);                           \
             }                                                                                                          \
-            else                                                                                                       \
+            else if constexpr (none_of_both_types<_DataType_input1,                                                    \
+                                                  _DataType_input2,                                                    \
+                                                  std::complex<float>,                                                 \
+                                                  std::complex<double>>)                                               \
             {                                                                                                          \
-                auto kernel_parallel_for_func = [=](sycl::id<1> global_id) {                                           \
-                    const size_t i = global_id[0]; /*for (size_t i = 0; i < result_size; ++i)*/                        \
+                constexpr size_t lws = 64;                                                                             \
+                constexpr unsigned int vec_sz = 8;                                                                     \
+                constexpr sycl::access::address_space global_space = sycl::access::address_space::global_space;        \
+                                                                                                                       \
+                auto gws_range = sycl::range<1>(((result_size + lws * vec_sz - 1) / (lws * vec_sz)) * lws);            \
+                auto lws_range = sycl::range<1>(lws);                                                                  \
+                                                                                                                       \
+                auto kernel_parallel_for_func = [=](sycl::nd_item<1> nd_it) {                                          \
+                    auto sg = nd_it.get_sub_group();                                                                   \
+                    const auto max_sg_size = sg.get_max_local_range()[0];                                              \
+                    const size_t start =                                                                               \
+                        vec_sz * (nd_it.get_group(0) * nd_it.get_local_range(0) + sg.get_group_id()[0] * max_sg_size); \
+                                                                                                                       \
+                    if (start + static_cast<size_t>(vec_sz) * max_sg_size < result_size)                               \
+                    {                                                                                                  \
+                        using input1_ptrT = sycl::multi_ptr<_DataType_input1, global_space>;                           \
+                        using input2_ptrT = sycl::multi_ptr<_DataType_input2, global_space>;                           \
+                        using result_ptrT = sycl::multi_ptr<_DataType_output, global_space>;                           \
+                                                                                                                       \
+                        sycl::vec<_DataType_output, vec_sz> res_vec;                                                   \
+                                                                                                                       \
+                        if constexpr (both_types_are_any_of<_DataType_input1, _DataType_input2, __vec_types__>)        \
+                        {                                                                                              \
+                            if constexpr (both_types_are_same<_DataType_input1, _DataType_input2, _DataType_output>)   \
+                            {                                                                                          \
+                                sycl::vec<_DataType_input1, vec_sz> x1 =                                               \
+                                    sg.load<vec_sz>(input1_ptrT(&input1_data[start]));                                 \
+                                sycl::vec<_DataType_input2, vec_sz> x2 =                                               \
+                                    sg.load<vec_sz>(input2_ptrT(&input2_data[start]));                                 \
+                                                                                                                       \
+                                res_vec = __vec_operation__;                                                           \
+                            }                                                                                          \
+                            else /* input types don't match result type, so explicit casting is required */            \
+                            {                                                                                          \
+                                sycl::vec<_DataType_output, vec_sz> x1 =                                               \
+                                    dpnp_vec_cast<_DataType_output, _DataType_input1, vec_sz>(                         \
+                                        sg.load<vec_sz>(input1_ptrT(&input1_data[start])));                            \
+                                sycl::vec<_DataType_output, vec_sz> x2 =                                               \
+                                    dpnp_vec_cast<_DataType_output, _DataType_input2, vec_sz>(                         \
+                                        sg.load<vec_sz>(input2_ptrT(&input2_data[start])));                            \
+                                                                                                                       \
+                                res_vec = __vec_operation__;                                                           \
+                            }                                                                                          \
+                        }                                                                                              \
+                        else                                                                                           \
+                        {                                                                                              \
+                            sycl::vec<_DataType_input1, vec_sz> x1 =                                                   \
+                                sg.load<vec_sz>(input1_ptrT(&input1_data[start]));                                     \
+                            sycl::vec<_DataType_input2, vec_sz> x2 =                                                   \
+                                sg.load<vec_sz>(input2_ptrT(&input2_data[start]));                                     \
+                                                                                                                       \
+                            for (size_t k = 0; k < vec_sz; ++k)                                                        \
+                            {                                                                                          \
+                                const _DataType_output input1_elem = x1[k];                                            \
+                                const _DataType_output input2_elem = x2[k];                                            \
+                                res_vec[k] = __operation__;                                                            \
+                            }                                                                                          \
+                        }                                                                                              \
+                        sg.store<vec_sz>(result_ptrT(&result[start]), res_vec);                                        \
+                    }                                                                                                  \
+                    else                                                                                               \
                     {                                                                                                  \
-                        const _DataType_output input1_elem = input1_data[i];                                           \
-                        const _DataType_output input2_elem = input2_data[i];                                           \
-                        result[i] = __operation1__;                                                                    \
+                        for (size_t k = start + sg.get_local_id()[0]; k < result_size; k += max_sg_size)               \
+                        {                                                                                              \
+                            const _DataType_output input1_elem = input1_data[k];                                       \
+                            const _DataType_output input2_elem = input2_data[k];                                       \
+                            result[k] = __operation__;                                                                 \
+                        }                                                                                              \
                     }                                                                                                  \
+                };                                                                                                     \
+                                                                                                                       \
+                auto kernel_func = [&](sycl::handler& cgh) {                                                           \
+                    cgh.parallel_for<                                                                                  \
+                        class __name__##_sg_kernel<_DataType_output, _DataType_input1, _DataType_input2>>(             \
+                        sycl::nd_range<1>(gws_range, lws_range), kernel_parallel_for_func);                            \
+                };                                                                                                     \
+                event = q.submit(kernel_func);                                                                         \
+            }                                                                                                          \
+            else /* either input1 or input2 has complex type */                                                        \
+            {                                                                                                          \
+                auto kernel_parallel_for_func = [=](sycl::id<1> global_id) {                                           \
+                    const size_t i = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */                      \
+                                                                                                                       \
+                    const _DataType_output input1_elem = input1_data[i];                                               \
+                    const _DataType_output input2_elem = input2_data[i];                                               \
+                    result[i] = __operation__;                                                                         \
                 };                                                                                                     \
                 auto kernel_func = [&](sycl::handler& cgh) {                                                           \
                     cgh.parallel_for<class __name__##_kernel<_DataType_output, _DataType_input1, _DataType_input2>>(   \
@@ -1013,18 +1109,7 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap)
             }                                                                                                          \
         }                                                                                                              \
                                                                                                                        \
-        input1_ptr.depends_on(event);                                                                                  \
-        input1_shape_ptr.depends_on(event);                                                                            \
-        input1_strides_ptr.depends_on(event);                                                                          \
-        input2_ptr.depends_on(event);                                                                                  \
-        input2_shape_ptr.depends_on(event);                                                                            \
-        input2_strides_ptr.depends_on(event);                                                                          \
-        result_ptr.depends_on(event);                                                                                  \
-        result_shape_ptr.depends_on(event);                                                                            \
-        result_strides_ptr.depends_on(event);                                                                          \
-                                                                                                                       \
         event_ref = reinterpret_cast<DPCTLSyclEventRef>(&event);                                                       \
-                                                                                                                       \
         return DPCTLEvent_Copy(event_ref);                                                                             \
     }                                                                                                                  \
                                                                                                                        \
@@ -1048,26 +1133,25 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap)
     {                                                                                                                  \
         DPCTLSyclQueueRef q_ref = reinterpret_cast<DPCTLSyclQueueRef>(&DPNP_QUEUE);                                    \
         DPCTLEventVectorRef dep_event_vec_ref = nullptr;                                                               \
-        DPCTLSyclEventRef event_ref = __name__<_DataType_output, _DataType_input1, _DataType_input2>(                  \
-            q_ref,                                                                                                     \
-            result_out,                                                                                                \
-            result_size,                                                                                               \
-            result_ndim,                                                                                               \
-            result_shape,                                                                                              \
-            result_strides,                                                                                            \
-            input1_in,                                                                                                 \
-            input1_size,                                                                                               \
-            input1_ndim,                                                                                               \
-            input1_shape,                                                                                              \
-            input1_strides,                                                                                            \
-            input2_in,                                                                                                 \
-            input2_size,                                                                                               \
-            input2_ndim,                                                                                               \
-            input2_shape,                                                                                              \
-            input2_strides,                                                                                            \
-            where,                                                                                                     \
-            dep_event_vec_ref                                                                                          \
-        );                                                                                                             \
+        DPCTLSyclEventRef event_ref =                                                                                  \
+            __name__<_DataType_output, _DataType_input1, _DataType_input2>(q_ref,                                      \
+                                                                           result_out,                                 \
+                                                                           result_size,                                \
+                                                                           result_ndim,                                \
+                                                                           result_shape,                               \
+                                                                           result_strides,                             \
+                                                                           input1_in,                                  \
+                                                                           input1_size,                                \
+                                                                           input1_ndim,                                \
+                                                                           input1_shape,                               \
+                                                                           input1_strides,                             \
+                                                                           input2_in,                                  \
+                                                                           input2_size,                                \
+                                                                           input2_ndim,                                \
+                                                                           input2_shape,                               \
+                                                                           input2_strides,                             \
+                                                                           where,                                      \
+                                                                           dep_event_vec_ref);                         \
         DPCTLEvent_WaitAndThrow(event_ref);                                                                            \
         DPCTLEvent_Delete(event_ref);                                                                                  \
     }                                                                                                                  \
@@ -1108,12 +1192,91 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap)
                                         const shape_elem_type*,                                                        \
                                         const shape_elem_type*,                                                        \
                                         const size_t*,                                                                 \
-                                        const DPCTLEventVectorRef) = __name__<_DataType_output,                        \
-                                                                              _DataType_input1,                        \
-                                                                              _DataType_input2>;
+                                        const DPCTLEventVectorRef) =                                                   \
+        __name__<_DataType_output, _DataType_input1, _DataType_input2>;
 
 #include <dpnp_gen_2arg_3type_tbl.hpp>
 
+template <DPNPFuncType FT1, DPNPFuncType FT2, typename has_fp64 = std::true_type>
+static constexpr DPNPFuncType get_divide_res_type()
+{
+    constexpr auto widest_type = populate_func_types<FT1, FT2>();
+    constexpr auto shortes_type = (widest_type == FT1) ? FT2 : FT1;
+
+    if constexpr (widest_type == DPNPFuncType::DPNP_FT_CMPLX128 || widest_type == DPNPFuncType::DPNP_FT_DOUBLE)
+    {
+        return widest_type;
+    }
+    else if constexpr (widest_type == DPNPFuncType::DPNP_FT_CMPLX64)
+    {
+        if constexpr (shortes_type == DPNPFuncType::DPNP_FT_DOUBLE)
+        {
+            return DPNPFuncType::DPNP_FT_CMPLX128;
+        }
+        else if constexpr (has_fp64::value &&
+                           (shortes_type == DPNPFuncType::DPNP_FT_INT || shortes_type == DPNPFuncType::DPNP_FT_LONG))
+        {
+            return DPNPFuncType::DPNP_FT_CMPLX128;
+        }
+    }
+    else if constexpr (widest_type == DPNPFuncType::DPNP_FT_FLOAT)
+    {
+        if constexpr (has_fp64::value &&
+                      (shortes_type == DPNPFuncType::DPNP_FT_INT || shortes_type == DPNPFuncType::DPNP_FT_LONG))
+        {
+            return DPNPFuncType::DPNP_FT_DOUBLE;
+        }
+    }
+    else if constexpr (has_fp64::value)
+    {
+        return DPNPFuncType::DPNP_FT_DOUBLE;
+    }
+    else
+    {
+        return DPNPFuncType::DPNP_FT_FLOAT;
+    }
+    return widest_type;
+}
+
+template <DPNPFuncType FT1, DPNPFuncType... FTs>
+static void func_map_elemwise_2arg_3type_core(func_map_t& fmap)
+{
+    ((fmap[DPNPFuncName::DPNP_FN_ADD_EXT][FT1][FTs] =
+          {populate_func_types<FT1, FTs>(),
+           (void*)dpnp_add_c_ext<func_type_map_t::find_type<populate_func_types<FT1, FTs>()>,
+                                 func_type_map_t::find_type<FT1>,
+                                 func_type_map_t::find_type<FTs>>}),
+     ...);
+    ((fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][FT1][FTs] =
+          {populate_func_types<FT1, FTs>(),
+           (void*)dpnp_multiply_c_ext<func_type_map_t::find_type<populate_func_types<FT1, FTs>()>,
+                                      func_type_map_t::find_type<FT1>,
+                                      func_type_map_t::find_type<FTs>>}),
+     ...);
+    ((fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][FT1][FTs] =
+          {populate_func_types<FT1, FTs>(),
+           (void*)dpnp_subtract_c_ext<func_type_map_t::find_type<populate_func_types<FT1, FTs>()>,
+                                      func_type_map_t::find_type<FT1>,
+                                      func_type_map_t::find_type<FTs>>}),
+     ...);
+    ((fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][FT1][FTs] =
+          {get_divide_res_type<FT1, FTs>(),
+           (void*)dpnp_divide_c_ext<func_type_map_t::find_type<get_divide_res_type<FT1, FTs>()>,
+                                    func_type_map_t::find_type<FT1>,
+                                    func_type_map_t::find_type<FTs>>,
+           get_divide_res_type<FT1, FTs, std::false_type>(),
+           (void*)dpnp_divide_c_ext<func_type_map_t::find_type<get_divide_res_type<FT1, FTs, std::false_type>()>,
+                                    func_type_map_t::find_type<FT1>,
+                                    func_type_map_t::find_type<FTs>>}),
+     ...);
+}
+
+template <DPNPFuncType... FTs>
+static void func_map_elemwise_2arg_3type_helper(func_map_t& fmap)
+{
+    ((func_map_elemwise_2arg_3type_core<FTs, FTs...>(fmap)), ...);
+}
+
 static void func_map_init_elemwise_2arg_3type(func_map_t& fmap)
 {
     fmap[DPNPFuncName::DPNP_FN_ADD][eft_INT][eft_INT] = {eft_INT,
@@ -1149,39 +1312,6 @@ static void func_map_init_elemwise_2arg_3type(func_map_t& fmap)
     fmap[DPNPFuncName::DPNP_FN_ADD][eft_DBL][eft_DBL] = {eft_DBL,
                                                          (void*)dpnp_add_c_default<double, double, double>};
 
-    fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_INT][eft_INT] = {eft_INT,
-                                                             (void*)dpnp_add_c_ext<int32_t, int32_t, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_INT][eft_LNG] = {eft_LNG,
-                                                             (void*)dpnp_add_c_ext<int64_t, int32_t, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_INT][eft_FLT] = {eft_DBL,
-                                                             (void*)dpnp_add_c_ext<double, int32_t, float>};
-    fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_INT][eft_DBL] = {eft_DBL,
-                                                             (void*)dpnp_add_c_ext<double, int32_t, double>};
-    fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_LNG][eft_INT] = {eft_LNG,
-                                                             (void*)dpnp_add_c_ext<int64_t, int64_t, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_LNG][eft_LNG] = {eft_LNG,
-                                                             (void*)dpnp_add_c_ext<int64_t, int64_t, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_LNG][eft_FLT] = {eft_DBL,
-                                                             (void*)dpnp_add_c_ext<double, int64_t, float>};
-    fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_LNG][eft_DBL] = {eft_DBL,
-                                                             (void*)dpnp_add_c_ext<double, int64_t, double>};
-    fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_FLT][eft_INT] = {eft_DBL,
-                                                             (void*)dpnp_add_c_ext<double, float, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_FLT][eft_LNG] = {eft_DBL,
-                                                             (void*)dpnp_add_c_ext<double, float, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_FLT][eft_FLT] = {eft_FLT,
-                                                             (void*)dpnp_add_c_ext<float, float, float>};
-    fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_FLT][eft_DBL] = {eft_DBL,
-                                                             (void*)dpnp_add_c_ext<double, float, double>};
-    fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_DBL][eft_INT] = {eft_DBL,
-                                                             (void*)dpnp_add_c_ext<double, double, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_DBL][eft_LNG] = {eft_DBL,
-                                                             (void*)dpnp_add_c_ext<double, double, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_DBL][eft_FLT] = {eft_DBL,
-                                                             (void*)dpnp_add_c_ext<double, double, float>};
-    fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_DBL][eft_DBL] = {eft_DBL,
-                                                             (void*)dpnp_add_c_ext<double, double, double>};
-
     fmap[DPNPFuncName::DPNP_FN_ARCTAN2][eft_INT][eft_INT] = {eft_DBL,
                                                              (void*)dpnp_arctan2_c_default<double, int32_t, int32_t>};
     fmap[DPNPFuncName::DPNP_FN_ARCTAN2][eft_INT][eft_LNG] = {eft_DBL,
@@ -1347,39 +1477,6 @@ static void func_map_init_elemwise_2arg_3type(func_map_t& fmap)
     fmap[DPNPFuncName::DPNP_FN_DIVIDE][eft_DBL][eft_DBL] = {eft_DBL,
                                                             (void*)dpnp_divide_c_default<double, double, double>};
 
-    fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_INT][eft_INT] = {eft_DBL,
-                                                                (void*)dpnp_divide_c_ext<double, int32_t, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_INT][eft_LNG] = {eft_DBL,
-                                                                (void*)dpnp_divide_c_ext<double, int32_t, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_INT][eft_FLT] = {eft_DBL,
-                                                                (void*)dpnp_divide_c_ext<double, int32_t, float>};
-    fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_INT][eft_DBL] = {eft_DBL,
-                                                                (void*)dpnp_divide_c_ext<double, int32_t, double>};
-    fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_LNG][eft_INT] = {eft_DBL,
-                                                                (void*)dpnp_divide_c_ext<double, int64_t, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_LNG][eft_LNG] = {eft_DBL,
-                                                                (void*)dpnp_divide_c_ext<double, int64_t, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_LNG][eft_FLT] = {eft_DBL,
-                                                                (void*)dpnp_divide_c_ext<double, int64_t, float>};
-    fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_LNG][eft_DBL] = {eft_DBL,
-                                                                (void*)dpnp_divide_c_ext<double, int64_t, double>};
-    fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_FLT][eft_INT] = {eft_DBL,
-                                                                (void*)dpnp_divide_c_ext<double, float, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_FLT][eft_LNG] = {eft_DBL,
-                                                                (void*)dpnp_divide_c_ext<double, float, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_FLT][eft_FLT] = {eft_FLT,
-                                                                (void*)dpnp_divide_c_ext<float, float, float>};
-    fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_FLT][eft_DBL] = {eft_DBL,
-                                                                (void*)dpnp_divide_c_ext<double, float, double>};
-    fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_DBL][eft_INT] = {eft_DBL,
-                                                                (void*)dpnp_divide_c_ext<double, double, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_DBL][eft_LNG] = {eft_DBL,
-                                                                (void*)dpnp_divide_c_ext<double, double, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_DBL][eft_FLT] = {eft_DBL,
-                                                                (void*)dpnp_divide_c_ext<double, double, float>};
-    fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_DBL][eft_DBL] = {eft_DBL,
-                                                                (void*)dpnp_divide_c_ext<double, double, double>};
-
     fmap[DPNPFuncName::DPNP_FN_FMOD][eft_INT][eft_INT] = {eft_INT,
                                                           (void*)dpnp_fmod_c_default<int32_t, int32_t, int32_t>};
     fmap[DPNPFuncName::DPNP_FN_FMOD][eft_INT][eft_LNG] = {eft_LNG,
@@ -1725,111 +1822,6 @@ static void func_map_init_elemwise_2arg_3type(func_map_t& fmap)
     fmap[DPNPFuncName::DPNP_FN_MULTIPLY][eft_C128][eft_C128] = {
         eft_C128, (void*)dpnp_multiply_c_default<std::complex<double>, std::complex<double>, std::complex<double>>};
 
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_BLN][eft_BLN] = {
-        eft_BLN, (void*)dpnp_multiply_c_ext<bool, bool, bool>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_BLN][eft_INT] = {
-        eft_INT, (void*)dpnp_multiply_c_ext<int32_t, bool, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_BLN][eft_LNG] = {
-        eft_LNG, (void*)dpnp_multiply_c_ext<int64_t, bool, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_BLN][eft_FLT] = {
-        eft_FLT, (void*)dpnp_multiply_c_ext<float, bool, float>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_BLN][eft_DBL] = {
-        eft_DBL,  (void*)dpnp_multiply_c_ext<double, bool, double>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_BLN][eft_C64] = {
-        eft_C64, (void*)dpnp_multiply_c_ext<std::complex<float>, bool, std::complex<float>>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_BLN][eft_C128] = {
-        eft_C128,  (void*)dpnp_multiply_c_ext<std::complex<double>, bool, std::complex<double>>};
-
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_INT][eft_BLN] = {
-        eft_INT, (void*)dpnp_multiply_c_ext<int32_t, int32_t, bool>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_INT][eft_INT] = {
-        eft_INT, (void*)dpnp_multiply_c_ext<int32_t, int32_t, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_INT][eft_LNG] = {
-        eft_LNG, (void*)dpnp_multiply_c_ext<int64_t, int32_t, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_INT][eft_FLT] = {
-        eft_FLT, (void*)dpnp_multiply_c_ext<float, int32_t, float>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_INT][eft_DBL] = {
-        eft_DBL, (void*)dpnp_multiply_c_ext<double, int32_t, double>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_INT][eft_C64] = {
-        eft_C64, (void*)dpnp_multiply_c_ext<std::complex<float>, int32_t, std::complex<float>>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_INT][eft_C128] = {
-        eft_C128,  (void*)dpnp_multiply_c_ext<std::complex<double>, int32_t, std::complex<double>>};
-
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_LNG][eft_BLN] = {
-        eft_LNG, (void*)dpnp_multiply_c_ext<int64_t, int64_t, bool>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_LNG][eft_INT] = {
-        eft_LNG, (void*)dpnp_multiply_c_ext<int64_t, int64_t, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_LNG][eft_LNG] = {
-        eft_LNG, (void*)dpnp_multiply_c_ext<int64_t, int64_t, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_LNG][eft_FLT] = {
-        eft_FLT, (void*)dpnp_multiply_c_ext<float, int64_t, float>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_LNG][eft_DBL] = {
-        eft_DBL, (void*)dpnp_multiply_c_ext<double, int64_t, double>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_LNG][eft_C64] = {
-        eft_C64, (void*)dpnp_multiply_c_ext<std::complex<float>, int64_t, std::complex<float>>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_LNG][eft_C128] = {
-        eft_C128,  (void*)dpnp_multiply_c_ext<std::complex<double>, int64_t, std::complex<double>>};
-
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_FLT][eft_BLN] = {
-        eft_FLT, (void*)dpnp_multiply_c_ext<float, float, bool>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_FLT][eft_INT] = {
-        eft_FLT, (void*)dpnp_multiply_c_ext<float, float, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_FLT][eft_LNG] = {
-        eft_FLT, (void*)dpnp_multiply_c_ext<float, float, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_FLT][eft_FLT] = {
-        eft_FLT, (void*)dpnp_multiply_c_ext<float, float, float>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_FLT][eft_DBL] = {
-        eft_DBL, (void*)dpnp_multiply_c_ext<double, float, double>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_FLT][eft_C64] = {
-        eft_C64, (void*)dpnp_multiply_c_ext<std::complex<float>, float, std::complex<float>>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_FLT][eft_C128] = {
-        eft_C128,  (void*)dpnp_multiply_c_ext<std::complex<double>, float, std::complex<double>>};
-
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_DBL][eft_BLN] = {
-        eft_DBL, (void*)dpnp_multiply_c_ext<double, double, bool>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_DBL][eft_INT] = {
-        eft_DBL, (void*)dpnp_multiply_c_ext<double, double, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_DBL][eft_LNG] = {
-        eft_DBL, (void*)dpnp_multiply_c_ext<double, double, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_DBL][eft_FLT] = {
-        eft_DBL, (void*)dpnp_multiply_c_ext<double, double, float>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_DBL][eft_DBL] = {
-        eft_DBL, (void*)dpnp_multiply_c_ext<double, double, double>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_DBL][eft_C64] = {
-        eft_C64, (void*)dpnp_multiply_c_ext<std::complex<float>, double, std::complex<float>>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_DBL][eft_C128] = {
-        eft_C128,  (void*)dpnp_multiply_c_ext<std::complex<double>, double, std::complex<double>>};
-
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_C64][eft_BLN] = {
-        eft_C64, (void*)dpnp_multiply_c_ext<std::complex<float>, std::complex<float>, bool>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_C64][eft_INT] = {
-        eft_C64, (void*)dpnp_multiply_c_ext<std::complex<float>, std::complex<float>, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_C64][eft_LNG] = {
-        eft_C64, (void*)dpnp_multiply_c_ext<std::complex<float>, std::complex<float>, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_C64][eft_FLT] = {
-        eft_C64, (void*)dpnp_multiply_c_ext<std::complex<float>, std::complex<float>, float>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_C64][eft_DBL] = {
-        eft_C64, (void*)dpnp_multiply_c_ext<std::complex<float>, std::complex<float>, double>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_C64][eft_C64] = {
-        eft_C64, (void*)dpnp_multiply_c_ext<std::complex<float>, std::complex<float>, std::complex<float>>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_C64][eft_C128] = {
-        eft_C128, (void*)dpnp_multiply_c_ext<std::complex<double>, std::complex<float>, std::complex<double>>};
-
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_C128][eft_BLN] = {
-        eft_C128, (void*)dpnp_multiply_c_ext<std::complex<double>, std::complex<double>, bool>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_C128][eft_INT] = {
-        eft_C128, (void*)dpnp_multiply_c_ext<std::complex<double>, std::complex<double>, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_C128][eft_LNG] = {
-        eft_C128, (void*)dpnp_multiply_c_ext<std::complex<double>, std::complex<double>, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_C128][eft_FLT] = {
-        eft_C128, (void*)dpnp_multiply_c_ext<std::complex<double>, std::complex<double>, float>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_C128][eft_DBL] = {
-        eft_C128, (void*)dpnp_multiply_c_ext<std::complex<double>, std::complex<double>, double>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_C128][eft_C64] = {
-        eft_C128, (void*)dpnp_multiply_c_ext<std::complex<double>, std::complex<double>, std::complex<float>>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_C128][eft_C128] = {
-        eft_C128, (void*)dpnp_multiply_c_ext<std::complex<double>, std::complex<double>, std::complex<double>>};
-
     fmap[DPNPFuncName::DPNP_FN_POWER][eft_INT][eft_INT] = {eft_INT,
                                                            (void*)dpnp_power_c_default<int32_t, int32_t, int32_t>};
     fmap[DPNPFuncName::DPNP_FN_POWER][eft_INT][eft_LNG] = {eft_LNG,
@@ -1929,38 +1921,7 @@ static void func_map_init_elemwise_2arg_3type(func_map_t& fmap)
     fmap[DPNPFuncName::DPNP_FN_SUBTRACT][eft_DBL][eft_DBL] = {
         eft_DBL, (void*)dpnp_subtract_c_default<double, double, double>};
 
-    fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_INT][eft_INT] = {
-        eft_INT, (void*)dpnp_subtract_c_ext<int32_t, int32_t, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_INT][eft_LNG] = {
-        eft_LNG, (void*)dpnp_subtract_c_ext<int64_t, int32_t, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_INT][eft_FLT] = {
-        eft_DBL, (void*)dpnp_subtract_c_ext<double, int32_t, float>};
-    fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_INT][eft_DBL] = {
-        eft_DBL, (void*)dpnp_subtract_c_ext<double, int32_t, double>};
-    fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_LNG][eft_INT] = {
-        eft_LNG, (void*)dpnp_subtract_c_ext<int64_t, int64_t, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_LNG][eft_LNG] = {
-        eft_LNG, (void*)dpnp_subtract_c_ext<int64_t, int64_t, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_LNG][eft_FLT] = {
-        eft_DBL, (void*)dpnp_subtract_c_ext<double, int64_t, float>};
-    fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_LNG][eft_DBL] = {
-        eft_DBL, (void*)dpnp_subtract_c_ext<double, int64_t, double>};
-    fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_FLT][eft_INT] = {
-        eft_DBL, (void*)dpnp_subtract_c_ext<double, float, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_FLT][eft_LNG] = {
-        eft_DBL, (void*)dpnp_subtract_c_ext<double, float, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_FLT][eft_FLT] = {
-        eft_FLT, (void*)dpnp_subtract_c_ext<float, float, float>};
-    fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_FLT][eft_DBL] = {
-        eft_DBL, (void*)dpnp_subtract_c_ext<double, float, double>};
-    fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_DBL][eft_INT] = {
-        eft_DBL, (void*)dpnp_subtract_c_ext<double, double, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_DBL][eft_LNG] = {
-        eft_DBL, (void*)dpnp_subtract_c_ext<double, double, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_DBL][eft_FLT] = {
-        eft_DBL, (void*)dpnp_subtract_c_ext<double, double, float>};
-    fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_DBL][eft_DBL] = {
-        eft_DBL, (void*)dpnp_subtract_c_ext<double, double, double>};
+    func_map_elemwise_2arg_3type_helper<eft_BLN, eft_INT, eft_LNG, eft_FLT, eft_DBL, eft_C64, eft_C128>(fmap);
 
     return;
 }
diff --git a/dpnp/backend/kernels/dpnp_krnl_fft.cpp b/dpnp/backend/kernels/dpnp_krnl_fft.cpp
index 3d39f2f373c7..b3f9716d73f1 100644
--- a/dpnp/backend/kernels/dpnp_krnl_fft.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_fft.cpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2022, Intel Corporation
+// Copyright (c) 2016-2023, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -182,7 +182,10 @@ static void dpnp_fft_fft_mathlib_cmplx_to_cmplx_c(DPCTLSyclQueueRef q_ref,
                                                   size_t inverse,
                                                   const size_t norm)
 {
+    // avoid warning unused variable
     (void)result_shape;
+    (void)input_size;
+    (void)result_size;
 
     if (!shape_size) {
         return;
@@ -253,6 +256,9 @@ static DPCTLSyclEventRef dpnp_fft_fft_mathlib_real_to_cmplx_c(DPCTLSyclQueueRef
                                                               const size_t norm,
                                                               const size_t real)
 {
+    // avoid warning unused variable
+    (void)input_size;
+
     DPCTLSyclEventRef event_ref = nullptr;
     if (!shape_size) {
         return event_ref;
diff --git a/dpnp/backend/kernels/dpnp_krnl_indexing.cpp b/dpnp/backend/kernels/dpnp_krnl_indexing.cpp
index 5cde013b69f8..0b80ac678d34 100644
--- a/dpnp/backend/kernels/dpnp_krnl_indexing.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_indexing.cpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2020, Intel Corporation
+// Copyright (c) 2016-2023, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -896,6 +896,7 @@ DPCTLSyclEventRef dpnp_take_c(DPCTLSyclQueueRef q_ref,
                               const DPCTLEventVectorRef dep_event_vec_ref)
 {
     // avoid warning unused variable
+    (void)array1_size;
     (void)dep_event_vec_ref;
 
     DPCTLSyclEventRef event_ref = nullptr;
diff --git a/dpnp/backend/kernels/dpnp_krnl_logic.cpp b/dpnp/backend/kernels/dpnp_krnl_logic.cpp
index 109246913589..157347aa90c0 100644
--- a/dpnp/backend/kernels/dpnp_krnl_logic.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_logic.cpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2020, Intel Corporation
+// Copyright (c) 2016-2023, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -27,6 +27,7 @@
 
 #include "dpnp_fptr.hpp"
 #include "dpnp_iface.hpp"
+#include "dpnp_iterator.hpp"
 #include "dpnpc_memory_adapter.hpp"
 #include "queue_sycl.hpp"
 
@@ -286,6 +287,457 @@ DPCTLSyclEventRef (*dpnp_any_ext_c)(DPCTLSyclQueueRef,
                                     const size_t,
                                     const DPCTLEventVectorRef) = dpnp_any_c<_DataType, _ResultType>;
 
+
+#define MACRO_1ARG_1TYPE_LOGIC_OP(__name__, __operation__)                                                             \
+    template <typename _KernelNameSpecialization>                                                                      \
+    class __name__##_kernel;                                                                                           \
+                                                                                                                       \
+    template <typename _KernelNameSpecialization>                                                                      \
+    class __name__##_broadcast_kernel;                                                                                 \
+                                                                                                                       \
+    template <typename _KernelNameSpecialization>                                                                      \
+    class __name__##_strides_kernel;                                                                                   \
+                                                                                                                       \
+    template <typename _DataType_input1>                                                                               \
+    DPCTLSyclEventRef __name__(DPCTLSyclQueueRef q_ref,                                                                \
+                               void* result_out,                                                                       \
+                               const size_t result_size,                                                               \
+                               const size_t result_ndim,                                                               \
+                               const shape_elem_type* result_shape,                                                    \
+                               const shape_elem_type* result_strides,                                                  \
+                               const void* input1_in,                                                                  \
+                               const size_t input1_size,                                                               \
+                               const size_t input1_ndim,                                                               \
+                               const shape_elem_type* input1_shape,                                                    \
+                               const shape_elem_type* input1_strides,                                                  \
+                               const size_t* where,                                                                    \
+                               const DPCTLEventVectorRef dep_event_vec_ref)                                            \
+    {                                                                                                                  \
+        /* avoid warning unused variable*/                                                                             \
+        (result_shape);                                                                                                \
+        (void)where;                                                                                                   \
+        (void)dep_event_vec_ref;                                                                                       \
+                                                                                                                       \
+        DPCTLSyclEventRef event_ref = nullptr;                                                                         \
+                                                                                                                       \
+        if (!input1_size)                                                                                              \
+        {                                                                                                              \
+            return event_ref;                                                                                          \
+        }                                                                                                              \
+                                                                                                                       \
+        sycl::queue q = *(reinterpret_cast<sycl::queue *>(q_ref));                                                     \
+                                                                                                                       \
+        _DataType_input1* input1_data = static_cast<_DataType_input1 *>(const_cast<void *>(input1_in));                \
+        bool* result = static_cast<bool *>(result_out);                                                                \
+                                                                                                                       \
+        shape_elem_type* input1_shape_offsets = new shape_elem_type[input1_ndim];                                      \
+                                                                                                                       \
+        get_shape_offsets_inkernel(input1_shape, input1_ndim, input1_shape_offsets);                                   \
+        bool use_strides = !array_equal(input1_strides, input1_ndim, input1_shape_offsets, input1_ndim);               \
+        delete[] input1_shape_offsets;                                                                                 \
+                                                                                                                       \
+        if (use_strides)                                                                                               \
+        {                                                                                                              \
+            if (result_ndim != input1_ndim)                                                                            \
+            {                                                                                                          \
+                throw std::runtime_error("Result ndim=" + std::to_string(result_ndim) +                                \
+                                         " mismatches with input1 ndim=" + std::to_string(input1_ndim));               \
+            }                                                                                                          \
+                                                                                                                       \
+            /* memory transfer optimization, use USM-host for temporary speeds up tranfer to device */                 \
+            using usm_host_allocatorT = sycl::usm_allocator<shape_elem_type, sycl::usm::alloc::host>;                  \
+                                                                                                                       \
+            size_t strides_size = 2 * result_ndim;                                                                     \
+            shape_elem_type *dev_strides_data = sycl::malloc_device<shape_elem_type>(strides_size, q);                 \
+                                                                                                                       \
+            /* create host temporary for packed strides managed by shared pointer */                                   \
+            auto strides_host_packed = std::vector<shape_elem_type, usm_host_allocatorT>(strides_size,                 \
+                                                                                         usm_host_allocatorT(q));      \
+                                                                                                                       \
+            /* packed vector is concatenation of result_strides and input1_strides */                                  \
+            std::copy(result_strides, result_strides + result_ndim, strides_host_packed.begin());                      \
+            std::copy(input1_strides, input1_strides + result_ndim, strides_host_packed.begin() + result_ndim);        \
+                                                                                                                       \
+            auto copy_strides_ev = q.copy<shape_elem_type>(strides_host_packed.data(),                                 \
+                                                           dev_strides_data,                                           \
+                                                           strides_host_packed.size());                                \
+                                                                                                                       \
+            auto kernel_parallel_for_func = [=](sycl::id<1> global_id) {                                               \
+                const size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */                  \
+                {                                                                                                      \
+                    const shape_elem_type *result_strides_data = &dev_strides_data[0];                                 \
+                    const shape_elem_type *input1_strides_data = &dev_strides_data[1];                                 \
+                                                                                                                       \
+                    size_t input1_id = 0;                                                                              \
+                                                                                                                       \
+                    for (size_t i = 0; i < result_ndim; ++i)                                                           \
+                    {                                                                                                  \
+                        const size_t output_xyz_id =                                                                   \
+                            get_xyz_id_by_id_inkernel(output_id, result_strides_data, result_ndim, i);                 \
+                        input1_id += output_xyz_id * input1_strides_data[i];                                           \
+                    }                                                                                                  \
+                                                                                                                       \
+                    const _DataType_input1 input1_elem = input1_data[input1_id];                                       \
+                    result[output_id] = __operation__;                                                                 \
+                }                                                                                                      \
+            };                                                                                                         \
+            auto kernel_func = [&](sycl::handler& cgh) {                                                               \
+                cgh.depends_on(copy_strides_ev);                                                                       \
+                cgh.parallel_for<class __name__##_strides_kernel<_DataType_input1>>(                                   \
+                    sycl::range<1>(result_size), kernel_parallel_for_func);                                            \
+            };                                                                                                         \
+                                                                                                                       \
+            q.submit(kernel_func).wait();                                                                              \
+                                                                                                                       \
+            sycl::free(dev_strides_data, q);                                                                           \
+            return event_ref;                                                                                          \
+        }                                                                                                              \
+        else                                                                                                           \
+        {                                                                                                              \
+            constexpr size_t lws = 64;                                                                                 \
+            constexpr unsigned int vec_sz = 8;                                                                         \
+            constexpr sycl::access::address_space global_space = sycl::access::address_space::global_space;            \
+                                                                                                                       \
+            auto gws_range = sycl::range<1>(((result_size + lws * vec_sz - 1) / (lws * vec_sz)) * lws);                \
+            auto lws_range = sycl::range<1>(lws);                                                                      \
+                                                                                                                       \
+            auto kernel_parallel_for_func = [=](sycl::nd_item<1> nd_it) {                                              \
+                auto sg = nd_it.get_sub_group();                                                                       \
+                const auto max_sg_size = sg.get_max_local_range()[0];                                                  \
+                const size_t start = vec_sz * (nd_it.get_group(0) * nd_it.get_local_range(0) +                         \
+                                               sg.get_group_id()[0] * max_sg_size);                                    \
+                                                                                                                       \
+                if (start + static_cast<size_t>(vec_sz) * max_sg_size < result_size) {                                 \
+                    sycl::vec<_DataType_input1, vec_sz> x1 =                                                           \
+                        sg.load<vec_sz>(sycl::multi_ptr<_DataType_input1, global_space>(&input1_data[start]));         \
+                    sycl::vec<bool, vec_sz> res_vec;                                                                   \
+                                                                                                                       \
+                    for (size_t k = 0; k < vec_sz; ++k) {                                                              \
+                        const _DataType_input1 input1_elem = x1[k];                                                    \
+                        res_vec[k] = __operation__;                                                                    \
+                    }                                                                                                  \
+                    sg.store<vec_sz>(sycl::multi_ptr<bool, global_space>(&result[start]), res_vec);                    \
+                                                                                                                       \
+                }                                                                                                      \
+                else {                                                                                                 \
+                    for (size_t k = start; k < result_size; ++k) {                                                     \
+                        const _DataType_input1 input1_elem = input1_data[k];                                           \
+                        result[k] = __operation__;                                                                     \
+                    }                                                                                                  \
+                }                                                                                                      \
+            };                                                                                                         \
+                                                                                                                       \
+            auto kernel_func = [&](sycl::handler& cgh) {                                                               \
+                cgh.parallel_for<class __name__##_kernel<_DataType_input1>>(                                           \
+                    sycl::nd_range<1>(gws_range, lws_range), kernel_parallel_for_func);                                \
+            };                                                                                                         \
+            sycl::event event = q.submit(kernel_func);                                                                 \
+                                                                                                                       \
+            event_ref = reinterpret_cast<DPCTLSyclEventRef>(&event);                                                   \
+            return DPCTLEvent_Copy(event_ref);                                                                         \
+        }                                                                                                              \
+        return event_ref;                                                                                              \
+    }                                                                                                                  \
+                                                                                                                       \
+    template <typename _DataType_input1>                                                                               \
+    DPCTLSyclEventRef (*__name__##_ext)(DPCTLSyclQueueRef,                                                             \
+                                        void*,                                                                         \
+                                        const size_t,                                                                  \
+                                        const size_t,                                                                  \
+                                        const shape_elem_type*,                                                        \
+                                        const shape_elem_type*,                                                        \
+                                        const void*,                                                                   \
+                                        const size_t,                                                                  \
+                                        const size_t,                                                                  \
+                                        const shape_elem_type*,                                                        \
+                                        const shape_elem_type*,                                                        \
+                                        const size_t*,                                                                 \
+                                        const DPCTLEventVectorRef) = __name__<_DataType_input1>;
+
+#include <dpnp_gen_1arg_1type_tbl.hpp>
+
+template <DPNPFuncType ... FTs>
+static void func_map_logic_1arg_1type_helper(func_map_t& fmap)
+{
+    ((fmap[DPNPFuncName::DPNP_FN_LOGICAL_NOT_EXT][FTs][FTs] =
+        {eft_BLN, (void*)dpnp_logical_not_c_ext<func_type_map_t::find_type<FTs>>}), ...);
+}
+
+
+#define MACRO_2ARG_2TYPES_LOGIC_OP(__name__, __operation__)                                                            \
+    template <typename _KernelNameSpecialization1,                                                                     \
+              typename _KernelNameSpecialization2>                                                                     \
+    class __name__##_kernel;                                                                                           \
+                                                                                                                       \
+    template <typename _KernelNameSpecialization1,                                                                     \
+              typename _KernelNameSpecialization2>                                                                     \
+    class __name__##_broadcast_kernel;                                                                                 \
+                                                                                                                       \
+    template <typename _KernelNameSpecialization1,                                                                     \
+              typename _KernelNameSpecialization2>                                                                     \
+    class __name__##_strides_kernel;                                                                                   \
+                                                                                                                       \
+    template <typename _DataType_input1, typename _DataType_input2>                                                    \
+    DPCTLSyclEventRef __name__(DPCTLSyclQueueRef q_ref,                                                                \
+                               void* result_out,                                                                       \
+                               const size_t result_size,                                                               \
+                               const size_t result_ndim,                                                               \
+                               const shape_elem_type* result_shape,                                                    \
+                               const shape_elem_type* result_strides,                                                  \
+                               const void* input1_in,                                                                  \
+                               const size_t input1_size,                                                               \
+                               const size_t input1_ndim,                                                               \
+                               const shape_elem_type* input1_shape,                                                    \
+                               const shape_elem_type* input1_strides,                                                  \
+                               const void* input2_in,                                                                  \
+                               const size_t input2_size,                                                               \
+                               const size_t input2_ndim,                                                               \
+                               const shape_elem_type* input2_shape,                                                    \
+                               const shape_elem_type* input2_strides,                                                  \
+                               const size_t* where,                                                                    \
+                               const DPCTLEventVectorRef dep_event_vec_ref)                                            \
+    {                                                                                                                  \
+        /* avoid warning unused variable*/                                                                             \
+        (void)where;                                                                                                   \
+        (void)dep_event_vec_ref;                                                                                       \
+                                                                                                                       \
+        DPCTLSyclEventRef event_ref = nullptr;                                                                         \
+                                                                                                                       \
+        if (!input1_size || !input2_size)                                                                              \
+        {                                                                                                              \
+            return event_ref;                                                                                          \
+        }                                                                                                              \
+                                                                                                                       \
+        sycl::queue q = *(reinterpret_cast<sycl::queue *>(q_ref));                                                     \
+                                                                                                                       \
+        _DataType_input1* input1_data = static_cast<_DataType_input1 *>(const_cast<void *>(input1_in));                \
+        _DataType_input2* input2_data = static_cast<_DataType_input2 *>(const_cast<void *>(input2_in));                \
+        bool* result = static_cast<bool *>(result_out);                                                                \
+                                                                                                                       \
+        bool use_broadcasting = !array_equal(input1_shape, input1_ndim, input2_shape, input2_ndim);                    \
+                                                                                                                       \
+        shape_elem_type* input1_shape_offsets = new shape_elem_type[input1_ndim];                                      \
+                                                                                                                       \
+        get_shape_offsets_inkernel(input1_shape, input1_ndim, input1_shape_offsets);                                   \
+        bool use_strides = !array_equal(input1_strides, input1_ndim, input1_shape_offsets, input1_ndim);               \
+        delete[] input1_shape_offsets;                                                                                 \
+                                                                                                                       \
+        shape_elem_type* input2_shape_offsets = new shape_elem_type[input2_ndim];                                      \
+                                                                                                                       \
+        get_shape_offsets_inkernel(input2_shape, input2_ndim, input2_shape_offsets);                                   \
+        use_strides =                                                                                                  \
+            use_strides || !array_equal(input2_strides, input2_ndim, input2_shape_offsets, input2_ndim);               \
+        delete[] input2_shape_offsets;                                                                                 \
+                                                                                                                       \
+        sycl::event event;                                                                                             \
+        sycl::range<1> gws(result_size); /* used only when use_broadcasting or use_strides is true */                  \
+                                                                                                                       \
+        if (use_broadcasting)                                                                                          \
+        {                                                                                                              \
+            DPNPC_id<_DataType_input1>* input1_it;                                                                     \
+            const size_t input1_it_size_in_bytes = sizeof(DPNPC_id<_DataType_input1>);                                 \
+            input1_it = reinterpret_cast<DPNPC_id<_DataType_input1>*>(dpnp_memory_alloc_c(q_ref,                       \
+                                                                                          input1_it_size_in_bytes));   \
+            new (input1_it)                                                                                            \
+                DPNPC_id<_DataType_input1>(q_ref, input1_data, input1_shape, input1_strides, input1_ndim);             \
+                                                                                                                       \
+            input1_it->broadcast_to_shape(result_shape, result_ndim);                                                  \
+                                                                                                                       \
+            DPNPC_id<_DataType_input2>* input2_it;                                                                     \
+            const size_t input2_it_size_in_bytes = sizeof(DPNPC_id<_DataType_input2>);                                 \
+            input2_it = reinterpret_cast<DPNPC_id<_DataType_input2>*>(dpnp_memory_alloc_c(q_ref,                       \
+                                                                                          input2_it_size_in_bytes));   \
+            new (input2_it)                                                                                            \
+                DPNPC_id<_DataType_input2>(q_ref, input2_data, input2_shape, input2_strides, input2_ndim);             \
+                                                                                                                       \
+            input2_it->broadcast_to_shape(result_shape, result_ndim);                                                  \
+                                                                                                                       \
+            auto kernel_parallel_for_func = [=](sycl::id<1> global_id) {                                               \
+                const size_t i = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */                          \
+                {                                                                                                      \
+                    const _DataType_input1 input1_elem = (*input1_it)[i];                                              \
+                    const _DataType_input2 input2_elem = (*input2_it)[i];                                              \
+                    result[i] = __operation__;                                                                         \
+                }                                                                                                      \
+            };                                                                                                         \
+            auto kernel_func = [&](sycl::handler& cgh) {                                                               \
+                cgh.parallel_for<                                                                                      \
+                    class __name__##_broadcast_kernel<_DataType_input1, _DataType_input2>>(                            \
+                    gws, kernel_parallel_for_func);                                                                    \
+            };                                                                                                         \
+                                                                                                                       \
+            q.submit(kernel_func).wait();                                                                              \
+                                                                                                                       \
+            input1_it->~DPNPC_id();                                                                                    \
+            input2_it->~DPNPC_id();                                                                                    \
+                                                                                                                       \
+            return event_ref;                                                                                          \
+        }                                                                                                              \
+        else if (use_strides)                                                                                          \
+        {                                                                                                              \
+            if ((result_ndim != input1_ndim) || (result_ndim != input2_ndim))                                          \
+            {                                                                                                          \
+                throw std::runtime_error("Result ndim=" + std::to_string(result_ndim) +                                \
+                                         " mismatches with either input1 ndim=" + std::to_string(input1_ndim) +        \
+                                         " or input2 ndim=" + std::to_string(input2_ndim));                            \
+            }                                                                                                          \
+                                                                                                                       \
+            /* memory transfer optimization, use USM-host for temporary speeds up tranfer to device */                 \
+            using usm_host_allocatorT = sycl::usm_allocator<shape_elem_type, sycl::usm::alloc::host>;                  \
+                                                                                                                       \
+            size_t strides_size = 3 * result_ndim;                                                                     \
+            shape_elem_type *dev_strides_data = sycl::malloc_device<shape_elem_type>(strides_size, q);                 \
+                                                                                                                       \
+            /* create host temporary for packed strides managed by shared pointer */                                   \
+            auto strides_host_packed = std::vector<shape_elem_type, usm_host_allocatorT>(strides_size,                 \
+                                                                                         usm_host_allocatorT(q));      \
+                                                                                                                       \
+            /* packed vector is concatenation of result_strides, input1_strides and input2_strides */                  \
+            std::copy(result_strides, result_strides + result_ndim, strides_host_packed.begin());                      \
+            std::copy(input1_strides, input1_strides + result_ndim, strides_host_packed.begin() + result_ndim);        \
+            std::copy(input2_strides, input2_strides + result_ndim, strides_host_packed.begin() + 2 * result_ndim);    \
+                                                                                                                       \
+            auto copy_strides_ev = q.copy<shape_elem_type>(strides_host_packed.data(),                                 \
+                                                           dev_strides_data,                                           \
+                                                           strides_host_packed.size());                                \
+                                                                                                                       \
+            auto kernel_parallel_for_func = [=](sycl::id<1> global_id) {                                               \
+                const size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */                  \
+                {                                                                                                      \
+                    const shape_elem_type *result_strides_data = &dev_strides_data[0];                                 \
+                    const shape_elem_type *input1_strides_data = &dev_strides_data[1];                                 \
+                    const shape_elem_type *input2_strides_data = &dev_strides_data[2];                                 \
+                                                                                                                       \
+                    size_t input1_id = 0;                                                                              \
+                    size_t input2_id = 0;                                                                              \
+                                                                                                                       \
+                    for (size_t i = 0; i < result_ndim; ++i)                                                           \
+                    {                                                                                                  \
+                        const size_t output_xyz_id =                                                                   \
+                            get_xyz_id_by_id_inkernel(output_id, result_strides_data, result_ndim, i);                 \
+                        input1_id += output_xyz_id * input1_strides_data[i];                                           \
+                        input2_id += output_xyz_id * input2_strides_data[i];                                           \
+                    }                                                                                                  \
+                                                                                                                       \
+                    const _DataType_input1 input1_elem = input1_data[input1_id];                                       \
+                    const _DataType_input2 input2_elem = input2_data[input2_id];                                       \
+                    result[output_id] = __operation__;                                                                 \
+                }                                                                                                      \
+            };                                                                                                         \
+            auto kernel_func = [&](sycl::handler& cgh) {                                                               \
+                cgh.depends_on(copy_strides_ev);                                                                       \
+                cgh.parallel_for<                                                                                      \
+                    class __name__##_strides_kernel<_DataType_input1, _DataType_input2>>(                              \
+                    gws, kernel_parallel_for_func);                                                                    \
+            };                                                                                                         \
+                                                                                                                       \
+            q.submit(kernel_func).wait();                                                                              \
+                                                                                                                       \
+            sycl::free(dev_strides_data, q);                                                                           \
+            return event_ref;                                                                                          \
+        }                                                                                                              \
+        else                                                                                                           \
+        {                                                                                                              \
+            constexpr size_t lws = 64;                                                                                 \
+            constexpr unsigned int vec_sz = 8;                                                                         \
+            constexpr sycl::access::address_space global_space = sycl::access::address_space::global_space;            \
+                                                                                                                       \
+            auto gws_range = sycl::range<1>(((result_size + lws * vec_sz - 1) / (lws * vec_sz)) * lws);                \
+            auto lws_range = sycl::range<1>(lws);                                                                      \
+                                                                                                                       \
+            auto kernel_parallel_for_func = [=](sycl::nd_item<1> nd_it) {                                              \
+                auto sg = nd_it.get_sub_group();                                                                       \
+                const auto max_sg_size = sg.get_max_local_range()[0];                                                  \
+                const size_t start = vec_sz * (nd_it.get_group(0) * nd_it.get_local_range(0) +                         \
+                                               sg.get_group_id()[0] * max_sg_size);                                    \
+                                                                                                                       \
+                if (start + static_cast<size_t>(vec_sz) * max_sg_size < result_size) {                                 \
+                    sycl::vec<_DataType_input1, vec_sz> x1 =                                                           \
+                        sg.load<vec_sz>(sycl::multi_ptr<_DataType_input1, global_space>(&input1_data[start]));         \
+                    sycl::vec<_DataType_input2, vec_sz> x2 =                                                           \
+                        sg.load<vec_sz>(sycl::multi_ptr<_DataType_input2, global_space>(&input2_data[start]));         \
+                    sycl::vec<bool, vec_sz> res_vec;                                                                   \
+                                                                                                                       \
+                    for (size_t k = 0; k < vec_sz; ++k) {                                                              \
+                        const _DataType_input1 input1_elem = x1[k];                                                    \
+                        const _DataType_input2 input2_elem = x2[k];                                                    \
+                        res_vec[k] = __operation__;                                                                    \
+                    }                                                                                                  \
+                    sg.store<vec_sz>(sycl::multi_ptr<bool, global_space>(&result[start]), res_vec);                    \
+                                                                                                                       \
+                }                                                                                                      \
+                else {                                                                                                 \
+                    for (size_t k = start; k < result_size; ++k) {                                                     \
+                        const _DataType_input1 input1_elem = input1_data[k];                                           \
+                        const _DataType_input2 input2_elem = input2_data[k];                                           \
+                        result[k] = __operation__;                                                                     \
+                    }                                                                                                  \
+                }                                                                                                      \
+            };                                                                                                         \
+                                                                                                                       \
+            auto kernel_func = [&](sycl::handler& cgh) {                                                               \
+                cgh.parallel_for<class __name__##_kernel<_DataType_input1, _DataType_input2>>(                         \
+                    sycl::nd_range<1>(gws_range, lws_range), kernel_parallel_for_func);                                \
+            };                                                                                                         \
+            event = q.submit(kernel_func);                                                                             \
+        }                                                                                                              \
+                                                                                                                       \
+        event_ref = reinterpret_cast<DPCTLSyclEventRef>(&event);                                                       \
+        return DPCTLEvent_Copy(event_ref);                                                                             \
+    }                                                                                                                  \
+                                                                                                                       \
+    template <typename _DataType_input1, typename _DataType_input2>                                                    \
+    DPCTLSyclEventRef (*__name__##_ext)(DPCTLSyclQueueRef,                                                             \
+                                        void*,                                                                         \
+                                        const size_t,                                                                  \
+                                        const size_t,                                                                  \
+                                        const shape_elem_type*,                                                        \
+                                        const shape_elem_type*,                                                        \
+                                        const void*,                                                                   \
+                                        const size_t,                                                                  \
+                                        const size_t,                                                                  \
+                                        const shape_elem_type*,                                                        \
+                                        const shape_elem_type*,                                                        \
+                                        const void*,                                                                   \
+                                        const size_t,                                                                  \
+                                        const size_t,                                                                  \
+                                        const shape_elem_type*,                                                        \
+                                        const shape_elem_type*,                                                        \
+                                        const size_t*,                                                                 \
+                                        const DPCTLEventVectorRef) = __name__<_DataType_input1,                        \
+                                                                              _DataType_input2>;
+
+#include <dpnp_gen_2arg_2type_tbl.hpp>
+
+template <DPNPFuncType FT1, DPNPFuncType ... FTs>
+static void func_map_logic_2arg_2type_core(func_map_t& fmap)
+{
+    ((fmap[DPNPFuncName::DPNP_FN_EQUAL_EXT][FT1][FTs] =
+        {eft_BLN, (void*)dpnp_equal_c_ext<func_type_map_t::find_type<FT1>, func_type_map_t::find_type<FTs>>}), ...);
+    ((fmap[DPNPFuncName::DPNP_FN_GREATER_EXT][FT1][FTs] =
+        {eft_BLN, (void*)dpnp_greater_c_ext<func_type_map_t::find_type<FT1>, func_type_map_t::find_type<FTs>>}), ...);
+    ((fmap[DPNPFuncName::DPNP_FN_GREATER_EQUAL_EXT][FT1][FTs] =
+        {eft_BLN, (void*)dpnp_greater_equal_c_ext<func_type_map_t::find_type<FT1>, func_type_map_t::find_type<FTs>>}), ...);
+    ((fmap[DPNPFuncName::DPNP_FN_LESS_EXT][FT1][FTs] =
+        {eft_BLN, (void*)dpnp_less_c_ext<func_type_map_t::find_type<FT1>, func_type_map_t::find_type<FTs>>}), ...);
+    ((fmap[DPNPFuncName::DPNP_FN_LESS_EQUAL_EXT][FT1][FTs] =
+        {eft_BLN, (void*)dpnp_less_equal_c_ext<func_type_map_t::find_type<FT1>, func_type_map_t::find_type<FTs>>}), ...);
+    ((fmap[DPNPFuncName::DPNP_FN_LOGICAL_AND_EXT][FT1][FTs] =
+        {eft_BLN, (void*)dpnp_logical_and_c_ext<func_type_map_t::find_type<FT1>, func_type_map_t::find_type<FTs>>}), ...);
+    ((fmap[DPNPFuncName::DPNP_FN_LOGICAL_OR_EXT][FT1][FTs] =
+        {eft_BLN, (void*)dpnp_logical_or_c_ext<func_type_map_t::find_type<FT1>, func_type_map_t::find_type<FTs>>}), ...);
+    ((fmap[DPNPFuncName::DPNP_FN_LOGICAL_XOR_EXT][FT1][FTs] =
+        {eft_BLN, (void*)dpnp_logical_xor_c_ext<func_type_map_t::find_type<FT1>, func_type_map_t::find_type<FTs>>}), ...);
+    ((fmap[DPNPFuncName::DPNP_FN_NOT_EQUAL_EXT][FT1][FTs] =
+        {eft_BLN, (void*)dpnp_not_equal_c_ext<func_type_map_t::find_type<FT1>, func_type_map_t::find_type<FTs>>}), ...);
+}
+
+template <DPNPFuncType ... FTs>
+static void func_map_logic_2arg_2type_helper(func_map_t& fmap)
+{
+    ((func_map_logic_2arg_2type_core<FTs, FTs...>(fmap)), ...);
+}
+
 void func_map_init_logic(func_map_t& fmap)
 {
     fmap[DPNPFuncName::DPNP_FN_ALL][eft_BLN][eft_BLN] = {eft_BLN, (void*)dpnp_all_default_c<bool, bool>};
@@ -378,5 +830,8 @@ void func_map_init_logic(func_map_t& fmap)
     fmap[DPNPFuncName::DPNP_FN_ANY_EXT][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_any_ext_c<float, bool>};
     fmap[DPNPFuncName::DPNP_FN_ANY_EXT][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_any_ext_c<double, bool>};
 
+    func_map_logic_1arg_1type_helper<eft_BLN, eft_INT, eft_LNG, eft_FLT, eft_DBL>(fmap);
+    func_map_logic_2arg_2type_helper<eft_BLN, eft_INT, eft_LNG, eft_FLT, eft_DBL>(fmap);
+
     return;
 }
diff --git a/dpnp/backend/kernels/dpnp_krnl_random.cpp b/dpnp/backend/kernels/dpnp_krnl_random.cpp
index 4411e207003d..568db448d966 100644
--- a/dpnp/backend/kernels/dpnp_krnl_random.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_random.cpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2022, Intel Corporation
+// Copyright (c) 2016-2023, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -37,6 +37,9 @@
 #include "queue_sycl.hpp"
 #include "dpnp_random_state.hpp"
 
+static_assert(INTEL_MKL_VERSION >= __INTEL_MKL_2023_VERSION_REQUIRED,
+              "MKL does not meet minimum version requirement");
+
 namespace mkl_blas = oneapi::mkl::blas;
 namespace mkl_rng = oneapi::mkl::rng;
 namespace mkl_vm = oneapi::mkl::vm;
@@ -990,11 +993,7 @@ DPCTLSyclEventRef dpnp_rng_multinomial_c(DPCTLSyclQueueRef q_ref,
             DPNPC_ptr_adapter<_DataType> result_ptr(q_ref, result, size, true, true);
             _DataType* result1 = result_ptr.get_ptr();
 
-#if (INTEL_MKL_VERSION < __INTEL_MKL_2023_SWITCHOVER)
-            std::vector<double> p(p_data, p_data + p_size);
-#else
             auto p = sycl::span<double>{p_data, p_size};
-#endif
             mkl_rng::multinomial<_DataType> distribution(ntrial, p);
 
             // perform generation
@@ -1082,13 +1081,8 @@ DPCTLSyclEventRef dpnp_rng_multivariate_normal_c(DPCTLSyclQueueRef q_ref,
 
     _DataType* result1 = static_cast<_DataType *>(result);
 
-#if (INTEL_MKL_VERSION < __INTEL_MKL_2023_SWITCHOVER)
-    std::vector<double> mean(mean_data, mean_data + mean_size);
-    std::vector<double> cov(cov_data, cov_data + cov_size);
-#else
     auto mean = sycl::span<double>{mean_data, mean_size};
     auto cov = sycl::span<double>{cov_data, cov_size};
-#endif
 
     // `result` is a array for random numbers
     // `size` is a `result`'s len.
diff --git a/dpnp/backend/src/dpnp_fptr.hpp b/dpnp/backend/src/dpnp_fptr.hpp
index 5b10bc71a8be..742e6dff3783 100644
--- a/dpnp/backend/src/dpnp_fptr.hpp
+++ b/dpnp/backend/src/dpnp_fptr.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2020, Intel Corporation
+// Copyright (c) 2016-2023, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -33,6 +33,9 @@
 #define BACKEND_FPTR_H
 
 #include <map>
+#include <complex>
+
+#include <CL/sycl.hpp>
 
 #include <dpnp_iface_fptr.hpp>
 
@@ -64,6 +67,120 @@ const DPNPFuncType eft_C64 = DPNPFuncType::DPNP_FT_CMPLX64;
 const DPNPFuncType eft_C128 = DPNPFuncType::DPNP_FT_CMPLX128;
 const DPNPFuncType eft_BLN = DPNPFuncType::DPNP_FT_BOOL;
 
+/**
+ * An internal structure to build a pair of Data type enum value with C++ type
+ */
+template <DPNPFuncType FuncType, typename T>
+struct func_type_pair_t
+{
+   using type = T;
+
+   static func_type_pair_t get_pair(std::integral_constant<DPNPFuncType, FuncType>) { return {}; }
+};
+
+/**
+ * An internal structure to create a map of Data type enum value associated with C++ type
+ */
+template <typename ... Ps>
+struct func_type_map_factory_t : public Ps...
+{
+   using Ps::get_pair...;
+
+   template <DPNPFuncType FuncType>
+   using find_type = typename decltype(get_pair(std::integral_constant<DPNPFuncType, FuncType>{}))::type;
+};
+
+/**
+ * A map of the FPTR interface to link Data type enum value with accociated C++ type
+ */
+typedef func_type_map_factory_t<func_type_pair_t<eft_BLN, bool>,
+                                func_type_pair_t<eft_INT, std::int32_t>,
+                                func_type_pair_t<eft_LNG, std::int64_t>,
+                                func_type_pair_t<eft_FLT, float>,
+                                func_type_pair_t<eft_DBL, double>,
+                                func_type_pair_t<eft_C64, std::complex<float>>,
+                                func_type_pair_t<eft_C128, std::complex<double>>> func_type_map_t;
+
+/**
+ * Return an enum value of result type populated from input types.
+ */
+template <DPNPFuncType FT1, DPNPFuncType FT2>
+static constexpr DPNPFuncType populate_func_types()
+{
+    if constexpr (FT1 == DPNPFuncType::DPNP_FT_NONE)
+    {
+        throw std::runtime_error("Templated enum value of FT1 is None");
+    }
+    else if constexpr (FT2 == DPNPFuncType::DPNP_FT_NONE)
+    {
+        throw std::runtime_error("Templated enum value of FT2 is None");
+    }
+    return (FT1 < FT2) ? FT2 : FT1;
+}
+
+/**
+ * @brief A helper function to cast SYCL vector between types.
+ */
+template <typename Op, typename Vec, std::size_t... I>
+static auto dpnp_vec_cast_impl(const Vec& v, std::index_sequence<I...>)
+{
+    return Op{v[I]...};
+}
+
+/**
+ * @brief A casting function for SYCL vector.
+ * 
+ * @tparam dstT A result type upon casting.
+ * @tparam srcT An incoming type of the vector.
+ * @tparam N A number of elements with the vector.
+ * @tparam Indices A sequence of integers
+ * @param s An incoming SYCL vector to cast.
+ * @return SYCL vector casted to desctination type.
+ */
+template <typename dstT, typename srcT, std::size_t N, typename Indices = std::make_index_sequence<N>>
+static auto dpnp_vec_cast(const sycl::vec<srcT, N>& s)
+{
+    return dpnp_vec_cast_impl<sycl::vec<dstT, N>, sycl::vec<srcT, N>>(s, Indices{});
+}
+
+/**
+ * Removes parentheses for a passed list of types separated by comma.
+ * It's intended to be used in operations macro.
+ */
+#define MACRO_UNPACK_TYPES(...) __VA_ARGS__
+
+/**
+ * Implements std::is_same<> with variadic number of types to compare with
+ * and when type T has to match only one of types Ts.
+ */
+template <typename T, typename... Ts>
+struct is_any : std::disjunction<std::is_same<T, Ts>...> {};
+
+/**
+ * Implements std::is_same<> with variadic number of types to compare with
+ * and when type T has to match every type from Ts sequence.
+ */
+template <typename T, typename... Ts>
+struct are_same : std::conjunction<std::is_same<T, Ts>...> {};
+
+/**
+ * A template constat to check if both types T1 and T2 match every type from Ts sequence.
+ */
+template <typename T1, typename T2, typename... Ts>
+constexpr auto both_types_are_same = std::conjunction_v<is_any<T1, Ts...>, are_same<T1, T2>>;
+
+/**
+ * A template constat to check if both types T1 and T2 match any type from Ts.
+ */
+template <typename T1, typename T2, typename... Ts>
+constexpr auto both_types_are_any_of = std::conjunction_v<is_any<T1, Ts...>, is_any<T2, Ts...>>;
+
+/**
+ * A template constat to check if both types T1 and T2 don't match any type from Ts sequence.
+ */
+template <typename T1, typename T2, typename... Ts>
+constexpr auto none_of_both_types = !std::disjunction_v<is_any<T1, Ts...>, is_any<T2, Ts...>>;
+
 /**
  * FPTR interface initialization functions
  */
diff --git a/dpnp/backend/src/dpnp_utils.hpp b/dpnp/backend/src/dpnp_utils.hpp
index 33f4d750067f..985d5a61494e 100644
--- a/dpnp/backend/src/dpnp_utils.hpp
+++ b/dpnp/backend/src/dpnp_utils.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2022, Intel Corporation
+// Copyright (c) 2016-2023, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -45,15 +45,15 @@
  * Intel(R) oneAPI DPC++ 2022.2.1 compiler has version 20221020L on Linux and
  * 20221101L on Windows.
  */
-#ifndef __SYCL_COMPILER_2023_SWITCHOVER
-#define __SYCL_COMPILER_2023_SWITCHOVER 20221102L
+#ifndef __SYCL_COMPILER_VERSION_REQUIRED
+#define __SYCL_COMPILER_VERSION_REQUIRED 20221102L
 #endif
 
 /**
  * Version of Intel MKL at which transition to OneMKL release 2023.0.0 occurs.
  */
-#ifndef __INTEL_MKL_2023_SWITCHOVER
-#define __INTEL_MKL_2023_SWITCHOVER 20230000
+#ifndef __INTEL_MKL_2023_VERSION_REQUIRED
+#define __INTEL_MKL_2023_VERSION_REQUIRED 20230000
 #endif
 
 /**
diff --git a/dpnp/backend/src/dpnpc_memory_adapter.hpp b/dpnp/backend/src/dpnpc_memory_adapter.hpp
index dab09622a698..6c81f5267787 100644
--- a/dpnp/backend/src/dpnpc_memory_adapter.hpp
+++ b/dpnp/backend/src/dpnpc_memory_adapter.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2022, Intel Corporation
+// Copyright (c) 2016-2023, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -85,10 +85,6 @@ class DPNPC_ptr_adapter final
             std::cerr << "\n\t size_in_bytes=" << size_in_bytes;
             std::cerr << "\n\t pointer type=" << (long)src_ptr_type;
             std::cerr << "\n\t queue inorder=" << queue.is_in_order();
-#if (__SYCL_COMPILER_VERSION < __SYCL_COMPILER_2023_SWITCHOVER)
-            std::cerr << "\n\t queue is_host=" << queue.is_host();
-            std::cerr << "\n\t queue device is_host=" << queue.get_device().is_host();
-#endif
             std::cerr << "\n\t queue device is_cpu=" << queue.get_device().is_cpu();
             std::cerr << "\n\t queue device is_gpu=" << queue.get_device().is_gpu();
             std::cerr << "\n\t queue device is_accelerator=" << queue.get_device().is_accelerator();
diff --git a/dpnp/backend/src/queue_sycl.cpp b/dpnp/backend/src/queue_sycl.cpp
index 0810ed0aaba8..55f78230d64e 100644
--- a/dpnp/backend/src/queue_sycl.cpp
+++ b/dpnp/backend/src/queue_sycl.cpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2020, Intel Corporation
+// Copyright (c) 2016-2023, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -215,11 +215,6 @@ bool backend_sycl::backend_sycl_is_cpu()
     if (qptr.get_device().is_cpu()) {
         return true;
     }
-#if (__SYCL_COMPILER_VERSION < __SYCL_COMPILER_2023_SWITCHOVER)
-    else if (qptr.is_host() || qptr.get_device().is_host()) {
-        return true;
-    }
-#endif
 
     return false;
 }
diff --git a/dpnp/backend/src/queue_sycl.hpp b/dpnp/backend/src/queue_sycl.hpp
index af03e1b6f121..8683fdd5737d 100644
--- a/dpnp/backend/src/queue_sycl.hpp
+++ b/dpnp/backend/src/queue_sycl.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2020, Intel Corporation
+// Copyright (c) 2016-2023, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -113,7 +113,7 @@ class backend_sycl
     static void backend_sycl_queue_init(QueueOptions selector = QueueOptions::CPU_SELECTOR);
 
     /**
-     * Return True if current @ref queue is related to cpu or host device
+     * Return True if current @ref queue is related to cpu device
      */
     static bool backend_sycl_is_cpu();
 
@@ -137,6 +137,13 @@ class backend_sycl
 #else
         // temporal solution. Started from Sept-2020
         DPCTLSyclQueueRef DPCtrl_queue = DPCTLQueueMgr_GetCurrentQueue();
+        if (DPCtrl_queue == nullptr)
+        {
+            std::string reason = (DPCTLQueueMgr_GetQueueStackSize() == static_cast<size_t>(-1))
+                                     ? ": the queue stack is empty, probably no device is available."
+                                     : ".";
+            throw std::runtime_error("Failed to create a copy of SYCL queue with default device" + reason);
+        }
         return *(reinterpret_cast<sycl::queue*>(DPCtrl_queue));
 #endif
     }
diff --git a/dpnp/dparray.pyx b/dpnp/dparray.pyx
index 859bf49d59a8..dffbf6f65d15 100644
--- a/dpnp/dparray.pyx
+++ b/dpnp/dparray.pyx
@@ -1,7 +1,7 @@
 # cython: language_level=3
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2022, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -462,7 +462,7 @@ cdef class dparray:
             return ( < long * > self._dparray_data)[lin_idx]
         elif self.dtype == numpy.int32:
             return ( < int * > self._dparray_data)[lin_idx]
-        elif self.dtype == numpy.bool:
+        elif self.dtype == numpy.bool_:
             return ( < cpp_bool * > self._dparray_data)[lin_idx]
         elif self.dtype == numpy.complex128:
             return ( < double complex * > self._dparray_data)[lin_idx]
@@ -489,7 +489,7 @@ cdef class dparray:
             ( < long * > self._dparray_data)[lin_idx] = <long > value
         elif self.dtype == numpy.int32:
             ( < int * > self._dparray_data)[lin_idx] = <int > value
-        elif self.dtype == numpy.bool:
+        elif self.dtype == numpy.bool_:
             ( < cpp_bool * > self._dparray_data)[lin_idx] = < cpp_bool > value
         elif self.dtype == numpy.complex64:
             ( < float complex * > self._dparray_data)[lin_idx] = <float complex > value
@@ -876,7 +876,7 @@ cdef class dparray:
 
         """
 
-        if not numpy.issubsctype(self.dtype, numpy.complex):
+        if not numpy.issubsctype(self.dtype, numpy.complex_):
             return self
         else:
             return conjugate(self)
@@ -889,7 +889,7 @@ cdef class dparray:
 
         """
 
-        if not numpy.issubsctype(self.dtype, numpy.complex):
+        if not numpy.issubsctype(self.dtype, numpy.complex_):
             return self
         else:
             return conjugate(self)
diff --git a/dpnp/dpnp_algo/dpnp_algo.pxd b/dpnp/dpnp_algo/dpnp_algo.pxd
index e0c82b6125ce..9bf161b0aaf7 100644
--- a/dpnp/dpnp_algo/dpnp_algo.pxd
+++ b/dpnp/dpnp_algo/dpnp_algo.pxd
@@ -1,7 +1,7 @@
 # cython: language_level=3
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2022, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -127,6 +127,7 @@ cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName":  # need this na
         DPNP_FN_EIG_EXT
         DPNP_FN_EIGVALS
         DPNP_FN_EIGVALS_EXT
+        DPNP_FN_EQUAL_EXT
         DPNP_FN_ERF
         DPNP_FN_ERF_EXT
         DPNP_FN_EYE
@@ -155,6 +156,8 @@ cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName":  # need this na
         DPNP_FN_FMOD_EXT
         DPNP_FN_FULL
         DPNP_FN_FULL_LIKE
+        DPNP_FN_GREATER_EXT
+        DPNP_FN_GREATER_EQUAL_EXT
         DPNP_FN_HYPOT
         DPNP_FN_HYPOT_EXT
         DPNP_FN_IDENTITY
@@ -169,6 +172,8 @@ cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName":  # need this na
         DPNP_FN_KRON_EXT
         DPNP_FN_LEFT_SHIFT
         DPNP_FN_LEFT_SHIFT_EXT
+        DPNP_FN_LESS_EXT
+        DPNP_FN_LESS_EQUAL_EXT
         DPNP_FN_LOG
         DPNP_FN_LOG_EXT
         DPNP_FN_LOG10
@@ -177,6 +182,10 @@ cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName":  # need this na
         DPNP_FN_LOG1P_EXT
         DPNP_FN_LOG2
         DPNP_FN_LOG2_EXT
+        DPNP_FN_LOGICAL_AND_EXT
+        DPNP_FN_LOGICAL_NOT_EXT
+        DPNP_FN_LOGICAL_OR_EXT
+        DPNP_FN_LOGICAL_XOR_EXT
         DPNP_FN_MATMUL
         DPNP_FN_MATMUL_EXT
         DPNP_FN_MATRIX_RANK
@@ -203,6 +212,7 @@ cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName":  # need this na
         DPNP_FN_NEGATIVE_EXT
         DPNP_FN_NONZERO
         DPNP_FN_NONZERO_EXT
+        DPNP_FN_NOT_EQUAL_EXT
         DPNP_FN_ONES
         DPNP_FN_ONES_LIKE
         DPNP_FN_PARTITION
@@ -364,6 +374,8 @@ cdef extern from "dpnp_iface_fptr.hpp":
     struct DPNPFuncData:
         DPNPFuncType return_type
         void * ptr
+        DPNPFuncType return_type_no_fp64
+        void *ptr_no_fp64
 
     DPNPFuncData get_dpnp_function_ptr(DPNPFuncName name, DPNPFuncType first_type, DPNPFuncType second_type) except +
 
@@ -379,7 +391,7 @@ cdef extern from "constants.hpp":
 
 cdef extern from "dpnp_iface.hpp":
     void dpnp_queue_initialize_c(QueueOptions selector)
-    size_t dpnp_queue_is_cpu_c()
+    size_t dpnp_queue_is_cpu_c() except +
 
     char * dpnp_memory_alloc_c(size_t size_in_bytes) except +
     void dpnp_memory_free_c(void * ptr)
@@ -429,7 +441,7 @@ ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_2in_1out_strides_t)(c_dpctl.DPCTLSyclQu
                                                              const shape_elem_type * ,
                                                              const shape_elem_type * ,
                                                              const long * ,
-                                                             const c_dpctl.DPCTLEventVectorRef)
+                                                             const c_dpctl.DPCTLEventVectorRef) except +
 ctypedef void(*fptr_blas_gemm_2in_1out_t)(void *, void * , void * , size_t, size_t, size_t)
 ctypedef c_dpctl.DPCTLSyclEventRef(*dpnp_reduction_c_t)(c_dpctl.DPCTLSyclQueueRef,
                                                         void *,
diff --git a/dpnp/dpnp_algo/dpnp_algo.pyx b/dpnp/dpnp_algo/dpnp_algo.pyx
index 41f0c0c01026..f12707ccc761 100644
--- a/dpnp/dpnp_algo/dpnp_algo.pyx
+++ b/dpnp/dpnp_algo/dpnp_algo.pyx
@@ -1,7 +1,7 @@
 # cython: language_level=3
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2022, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -221,7 +221,7 @@ cpdef dpnp_queue_initialize():
 
 
 cpdef dpnp_queue_is_cpu():
-    """Return 1 if current queue is CPU or HOST. Return 0 otherwise.
+    """Return 1 if current queue is CPU. Return 0 otherwise.
 
     """
     return dpnp_queue_is_cpu_c()
@@ -276,7 +276,7 @@ cdef dpnp_DPNPFuncType_to_dtype(size_t type):
     elif type == <size_t > DPNP_FT_CMPLX128:
         return numpy.complex128
     elif type == <size_t > DPNP_FT_BOOL:
-        return numpy.bool
+        return numpy.bool_
     else:
         utils.checker_throw_type_error("dpnp_DPNPFuncType_to_dtype", type)
 
@@ -481,8 +481,6 @@ cdef utils.dpnp_descriptor call_fptr_2in_1out_strides(DPNPFuncName fptr_name,
     # get the FPTR data structure
     cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(fptr_name, x1_c_type, x2_c_type)
 
-    result_type = dpnp_DPNPFuncType_to_dtype( < size_t > kernel_data.return_type)
-
     # Create result array
     cdef shape_type_c x1_shape = x1_obj.shape
 
@@ -495,15 +493,26 @@ cdef utils.dpnp_descriptor call_fptr_2in_1out_strides(DPNPFuncName fptr_name,
 
     result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(x1_obj, x2_obj)
 
+    # get FPTR function and return type
+    cdef fptr_2in_1out_strides_t func = NULL
+    cdef DPNPFuncType return_type = DPNP_FT_NONE
+    if fptr_name != DPNP_FN_DIVIDE_EXT or result_sycl_device.has_aspect_fp64:
+        return_type = kernel_data.return_type
+        func = < fptr_2in_1out_strides_t > kernel_data.ptr
+    else:
+        return_type = kernel_data.return_type_no_fp64
+        func = < fptr_2in_1out_strides_t > kernel_data.ptr_no_fp64
+
     if out is None:
         """ Create result array with type given by FPTR data """
         result = utils.create_output_descriptor(result_shape,
-                                                kernel_data.return_type,
+                                                return_type,
                                                 None,
                                                 device=result_sycl_device,
                                                 usm_type=result_usm_type,
                                                 sycl_queue=result_sycl_queue)
     else:
+        result_type = dpnp_DPNPFuncType_to_dtype(< size_t > return_type)
         if out.dtype != result_type:
             utils.checker_throw_value_error(func_name, 'out.dtype', out.dtype, result_type)
         if out.shape != result_shape:
@@ -517,11 +526,10 @@ cdef utils.dpnp_descriptor call_fptr_2in_1out_strides(DPNPFuncName fptr_name,
 
     result_obj = result.get_array()
 
-    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_obj.sycl_queue
+    cdef c_dpctl.SyclQueue q = < c_dpctl.SyclQueue > result_obj.sycl_queue
     cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
 
     """ Call FPTR function """
-    cdef fptr_2in_1out_strides_t func = <fptr_2in_1out_strides_t > kernel_data.ptr
     cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref,
                                                     result.get_data(),
                                                     result.size,
diff --git a/dpnp/dpnp_algo/dpnp_algo_arraycreation.pyx b/dpnp/dpnp_algo/dpnp_algo_arraycreation.pyx
index c1c24a27747b..cb44a08db598 100644
--- a/dpnp/dpnp_algo/dpnp_algo_arraycreation.pyx
+++ b/dpnp/dpnp_algo/dpnp_algo_arraycreation.pyx
@@ -1,7 +1,7 @@
 # cython: language_level=3
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2022, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -37,7 +37,6 @@ and the rest of the library
 __all__ += [
     "dpnp_copy",
     "dpnp_diag",
-    "dpnp_eye",
     "dpnp_geomspace",
     "dpnp_identity",
     "dpnp_linspace",
@@ -46,8 +45,6 @@ __all__ += [
     "dpnp_ptp",
     "dpnp_trace",
     "dpnp_tri",
-    "dpnp_tril",
-    "dpnp_triu",
     "dpnp_vander",
 ]
 
@@ -84,9 +81,6 @@ ctypedef c_dpctl.DPCTLSyclEventRef(*custom_indexing_1out_func_ptr_t)(c_dpctl.DPC
                                                                      const size_t ,
                                                                      const int,
                                                                      const c_dpctl.DPCTLEventVectorRef) except +
-ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_eye_t)(c_dpctl.DPCTLSyclQueueRef,
-                                                     void *, int , const shape_elem_type * ,
-                                                     const c_dpctl.DPCTLEventVectorRef)
 ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_trace_t)(c_dpctl.DPCTLSyclQueueRef,
                                                        const void *,
                                                        void * ,
@@ -146,36 +140,6 @@ cpdef utils.dpnp_descriptor dpnp_diag(utils.dpnp_descriptor v, int k):
     return result
 
 
-cpdef utils.dpnp_descriptor dpnp_eye(N, M=None, k=0, dtype=None):
-    if dtype is None:
-        dtype = dpnp.float64
-
-    if M is None:
-        M = N
-
-    cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(dtype)
-
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_EYE_EXT, param1_type, param1_type)
-
-    cdef utils.dpnp_descriptor result = utils.create_output_descriptor((N, M), kernel_data.return_type, None)
-
-    result_sycl_queue = result.get_array().sycl_queue
-
-    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
-    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
-
-    cdef fptr_dpnp_eye_t func = <fptr_dpnp_eye_t > kernel_data.ptr
-
-    cdef shape_type_c result_shape = result.shape
-
-    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, result.get_data(), k, result_shape.data(), NULL)
-
-    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
-    c_dpctl.DPCTLEvent_Delete(event_ref)
-
-    return result
-
-
 cpdef utils.dpnp_descriptor dpnp_geomspace(start, stop, num, endpoint, dtype, axis):
     cdef shape_type_c obj_shape = utils._object_to_tuple(num)
     cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(obj_shape, dtype, None)
@@ -434,7 +398,7 @@ cpdef utils.dpnp_descriptor dpnp_trace(utils.dpnp_descriptor arr, offset=0, axis
     return result
 
 
-cpdef utils.dpnp_descriptor dpnp_tri(N, M=None, k=0, dtype=numpy.float):
+cpdef utils.dpnp_descriptor dpnp_tri(N, M=None, k=0, dtype=dpnp.float):
     if M is None:
         M = N
 
@@ -460,94 +424,6 @@ cpdef utils.dpnp_descriptor dpnp_tri(N, M=None, k=0, dtype=numpy.float):
     return result
 
 
-cpdef utils.dpnp_descriptor dpnp_tril(utils.dpnp_descriptor m, int k):
-    cdef shape_type_c input_shape = m.shape
-    cdef shape_type_c result_shape
-
-    if m.ndim == 1:
-        result_shape = (m.shape[0], m.shape[0])
-    else:
-        result_shape = m.shape
-
-    cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(m.dtype)
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_TRIL_EXT, param1_type, param1_type)
-
-    m_obj = m.get_array()
-
-    # ceate result array with type given by FPTR data
-    cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape,
-                                                                       kernel_data.return_type,
-                                                                       None,
-                                                                       device=m_obj.sycl_device,
-                                                                       usm_type=m_obj.usm_type,
-                                                                       sycl_queue=m_obj.sycl_queue)
-
-    result_sycl_queue = result.get_array().sycl_queue
-
-    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
-    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
-
-    cdef custom_1in_1out_func_ptr_t func = <custom_1in_1out_func_ptr_t > kernel_data.ptr
-    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref,
-                                                    m.get_data(),
-                                                    result.get_data(),
-                                                    k,
-                                                    input_shape.data(),
-                                                    result_shape.data(),
-                                                    m.ndim,
-                                                    result.ndim,
-                                                    NULL)  # dep_events_ref
-
-    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
-    c_dpctl.DPCTLEvent_Delete(event_ref)
-
-    return result
-
-
-cpdef utils.dpnp_descriptor dpnp_triu(utils.dpnp_descriptor m, int k):
-    cdef shape_type_c input_shape = m.shape
-    cdef shape_type_c result_shape
-
-    if m.ndim == 1:
-        result_shape = (m.shape[0], m.shape[0])
-    else:
-        result_shape = m.shape
-
-    cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(m.dtype)
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_TRIU_EXT, param1_type, param1_type)
-
-    m_obj = m.get_array()
-
-    # ceate result array with type given by FPTR data
-    cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape,
-                                                                       kernel_data.return_type,
-                                                                       None,
-                                                                       device=m_obj.sycl_device,
-                                                                       usm_type=m_obj.usm_type,
-                                                                       sycl_queue=m_obj.sycl_queue)
-
-    result_sycl_queue = result.get_array().sycl_queue
-
-    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
-    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
-
-    cdef custom_1in_1out_func_ptr_t func = <custom_1in_1out_func_ptr_t > kernel_data.ptr
-    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref,
-                                                    m.get_data(),
-                                                    result.get_data(),
-                                                    k,
-                                                    input_shape.data(),
-                                                    result_shape.data(),
-                                                    m.ndim,
-                                                    result.ndim,
-                                                    NULL)  # dep_events_ref
-
-    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
-    c_dpctl.DPCTLEvent_Delete(event_ref)
-
-    return result
-
-
 cpdef utils.dpnp_descriptor dpnp_vander(utils.dpnp_descriptor x1, int N, int increasing):
     cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(x1.dtype)
     cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_VANDER_EXT, param1_type, DPNP_FT_NONE)
diff --git a/dpnp/dpnp_algo/dpnp_algo_logic.pyx b/dpnp/dpnp_algo/dpnp_algo_logic.pyx
index e0b928ddf025..b6ac36db412b 100644
--- a/dpnp/dpnp_algo/dpnp_algo_logic.pyx
+++ b/dpnp/dpnp_algo/dpnp_algo_logic.pyx
@@ -1,7 +1,7 @@
 # cython: language_level=3
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2020, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -166,46 +166,28 @@ cpdef utils.dpnp_descriptor dpnp_any(utils.dpnp_descriptor array1):
     return result
 
 
-cpdef utils.dpnp_descriptor dpnp_equal(utils.dpnp_descriptor input1, utils.dpnp_descriptor input2):
-    result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(input1, input2)
-    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape,
-                                                                             dpnp.bool,
-                                                                             None,
-                                                                             device=result_sycl_device,
-                                                                             usm_type=result_usm_type,
-                                                                             sycl_queue=result_sycl_queue)
-    for i in range(result.size):
-        result.get_pyobj()[i] = dpnp.bool(input1.get_pyobj()[i] == input2.get_pyobj()[i])
-
-    return result
-
-
-cpdef utils.dpnp_descriptor dpnp_greater(utils.dpnp_descriptor input1, utils.dpnp_descriptor input2):
-    result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(input1, input2)
-    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape,
-                                                                             dpnp.bool,
-                                                                             None,
-                                                                             device=result_sycl_device,
-                                                                             usm_type=result_usm_type,
-                                                                             sycl_queue=result_sycl_queue)
-    for i in range(result.size):
-        result.get_pyobj()[i] = dpnp.bool(input1.get_pyobj()[i] > input2.get_pyobj()[i])
+cpdef utils.dpnp_descriptor dpnp_equal(utils.dpnp_descriptor x1_obj,
+                                       utils.dpnp_descriptor x2_obj,
+                                       object dtype=None,
+                                       utils.dpnp_descriptor out=None,
+                                       object where=True):
+    return call_fptr_2in_1out_strides(DPNP_FN_EQUAL_EXT, x1_obj, x2_obj, dtype, out, where, func_name="equal")
 
-    return result
 
+cpdef utils.dpnp_descriptor dpnp_greater(utils.dpnp_descriptor x1_obj,
+                                         utils.dpnp_descriptor x2_obj,
+                                         object dtype=None,
+                                         utils.dpnp_descriptor out=None,
+                                         object where=True):
+    return call_fptr_2in_1out_strides(DPNP_FN_GREATER_EXT, x1_obj, x2_obj, dtype, out, where, func_name="greater")
 
-cpdef utils.dpnp_descriptor dpnp_greater_equal(utils.dpnp_descriptor input1, utils.dpnp_descriptor input2):
-    result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(input1, input2)
-    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape,
-                                                                             dpnp.bool,
-                                                                             None,
-                                                                             device=result_sycl_device,
-                                                                             usm_type=result_usm_type,
-                                                                             sycl_queue=result_sycl_queue)
-    for i in range(result.size):
-        result.get_pyobj()[i] = dpnp.bool(input1.get_pyobj()[i] >= input2.get_pyobj()[i])
 
-    return result
+cpdef utils.dpnp_descriptor dpnp_greater_equal(utils.dpnp_descriptor x1_obj,
+                                               utils.dpnp_descriptor x2_obj,
+                                               object dtype=None,
+                                               utils.dpnp_descriptor out=None,
+                                               object where=True):
+    return call_fptr_2in_1out_strides(DPNP_FN_GREATER_EQUAL_EXT, x1_obj, x2_obj, dtype, out, where, func_name="greater_equal")
 
 
 cpdef utils.dpnp_descriptor dpnp_isclose(utils.dpnp_descriptor input1,
@@ -272,103 +254,56 @@ cpdef utils.dpnp_descriptor dpnp_isnan(utils.dpnp_descriptor input1):
     return result
 
 
-cpdef utils.dpnp_descriptor dpnp_less(utils.dpnp_descriptor input1, utils.dpnp_descriptor input2):
-    result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(input1, input2)
-    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape,
-                                                                             dpnp.bool,
-                                                                             None,
-                                                                             device=result_sycl_device,
-                                                                             usm_type=result_usm_type,
-                                                                             sycl_queue=result_sycl_queue)
-    for i in range(result.size):
-        result.get_pyobj()[i] = dpnp.bool(input1.get_pyobj()[i] < input2.get_pyobj()[i])
-
-    return result
-
-
-cpdef utils.dpnp_descriptor dpnp_less_equal(utils.dpnp_descriptor input1, utils.dpnp_descriptor input2):
-    result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(input1, input2)
-    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape,
-                                                                             dpnp.bool,
-                                                                             None,
-                                                                             device=result_sycl_device,
-                                                                             usm_type=result_usm_type,
-                                                                             sycl_queue=result_sycl_queue)
-    for i in range(result.size):
-        result.get_pyobj()[i] = dpnp.bool(input1.get_pyobj()[i] <= input2.get_pyobj()[i])
-
-    return result
-
-
-cpdef utils.dpnp_descriptor dpnp_logical_and(utils.dpnp_descriptor input1, utils.dpnp_descriptor input2):
-    result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(input1, input2)
-    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape,
-                                                                             dpnp.bool,
-                                                                             None,
-                                                                             device=result_sycl_device,
-                                                                             usm_type=result_usm_type,
-                                                                             sycl_queue=result_sycl_queue)
-
-    for i in range(result.size):
-        result.get_pyobj()[i] = numpy.logical_and(input1.get_pyobj()[i], input2.get_pyobj()[i])
-
-    return result
-
-
-cpdef utils.dpnp_descriptor dpnp_logical_not(utils.dpnp_descriptor input1):
-    input1_obj = input1.get_array()
-    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape,
-                                                                             dpnp.bool,
-                                                                             None,
-                                                                             device=input1_obj.sycl_device,
-                                                                             usm_type=input1_obj.usm_type,
-                                                                             sycl_queue=input1_obj.sycl_queue)
-
-    for i in range(result.size):
-        result.get_pyobj()[i] = numpy.logical_not(input1.get_pyobj()[i])
+cpdef utils.dpnp_descriptor dpnp_less(utils.dpnp_descriptor x1_obj,
+                                      utils.dpnp_descriptor x2_obj,
+                                      object dtype=None,
+                                      utils.dpnp_descriptor out=None,
+                                      object where=True):
+    return call_fptr_2in_1out_strides(DPNP_FN_LESS_EXT, x1_obj, x2_obj, dtype, out, where, func_name="less")
 
-    return result
 
+cpdef utils.dpnp_descriptor dpnp_less_equal(utils.dpnp_descriptor x1_obj,
+                                            utils.dpnp_descriptor x2_obj,
+                                            object dtype=None,
+                                            utils.dpnp_descriptor out=None,
+                                            object where=True):
+    return call_fptr_2in_1out_strides(DPNP_FN_LESS_EQUAL_EXT, x1_obj, x2_obj, dtype, out, where, func_name="less_equal")
 
-cpdef utils.dpnp_descriptor dpnp_logical_or(utils.dpnp_descriptor input1, utils.dpnp_descriptor input2):
-    result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(input1, input2)
-    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape,
-                                                                             dpnp.bool,
-                                                                             None,
-                                                                             device=result_sycl_device,
-                                                                             usm_type=result_usm_type,
-                                                                             sycl_queue=result_sycl_queue)
 
-    for i in range(result.size):
-        result.get_pyobj()[i] = numpy.logical_or(input1.get_pyobj()[i], input2.get_pyobj()[i])
+cpdef utils.dpnp_descriptor dpnp_logical_and(utils.dpnp_descriptor x1_obj,
+                                             utils.dpnp_descriptor x2_obj,
+                                             object dtype=None,
+                                             utils.dpnp_descriptor out=None,
+                                             object where=True):
+    return call_fptr_2in_1out_strides(DPNP_FN_LOGICAL_AND_EXT, x1_obj, x2_obj, dtype, out, where, func_name="logical_and")
 
-    return result
 
+cpdef utils.dpnp_descriptor dpnp_logical_not(utils.dpnp_descriptor x_obj,
+                                            object dtype=None,
+                                            utils.dpnp_descriptor out=None,
+                                            object where=True):
+    return call_fptr_1in_1out_strides(DPNP_FN_LOGICAL_NOT_EXT, x_obj, dtype, out, where, func_name="logical_not")
 
-cpdef utils.dpnp_descriptor dpnp_logical_xor(utils.dpnp_descriptor input1, utils.dpnp_descriptor input2):
-    result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(input1, input2)
-    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape,
-                                                                             dpnp.bool,
-                                                                             None,
-                                                                             device=result_sycl_device,
-                                                                             usm_type=result_usm_type,
-                                                                             sycl_queue=result_sycl_queue)
 
-    for i in range(result.size):
-        result.get_pyobj()[i] = numpy.logical_xor(input1.get_pyobj()[i], input2.get_pyobj()[i])
+cpdef utils.dpnp_descriptor dpnp_logical_or(utils.dpnp_descriptor x1_obj,
+                                            utils.dpnp_descriptor x2_obj,
+                                            object dtype=None,
+                                            utils.dpnp_descriptor out=None,
+                                            object where=True):
+    return call_fptr_2in_1out_strides(DPNP_FN_LOGICAL_OR_EXT, x1_obj, x2_obj, dtype, out, where, func_name="logical_or")
 
-    return result
 
+cpdef utils.dpnp_descriptor dpnp_logical_xor(utils.dpnp_descriptor x1_obj,
+                                             utils.dpnp_descriptor x2_obj,
+                                             object dtype=None,
+                                             utils.dpnp_descriptor out=None,
+                                             object where=True):
+    return call_fptr_2in_1out_strides(DPNP_FN_LOGICAL_XOR_EXT, x1_obj, x2_obj, dtype, out, where, func_name="logical_xor")
 
-cpdef utils.dpnp_descriptor dpnp_not_equal(utils.dpnp_descriptor input1, utils.dpnp_descriptor input2):
-    result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(input1, input2)
-    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape,
-                                                                             dpnp.bool,
-                                                                             None,
-                                                                             device=result_sycl_device,
-                                                                             usm_type=result_usm_type,
-                                                                             sycl_queue=result_sycl_queue)
-    for i in range(result.size):
-        result.get_pyobj()[i] = dpnp.bool(input1.get_pyobj()[i] != input2.get_pyobj()[i])
 
-    return result
+cpdef utils.dpnp_descriptor dpnp_not_equal(utils.dpnp_descriptor x1_obj,
+                                           utils.dpnp_descriptor x2_obj,
+                                           object dtype=None,
+                                           utils.dpnp_descriptor out=None,
+                                           object where=True):
+    return call_fptr_2in_1out_strides(DPNP_FN_NOT_EQUAL_EXT, x1_obj, x2_obj, dtype, out, where, func_name="not_equal")
diff --git a/dpnp/dpnp_array.py b/dpnp/dpnp_array.py
index 82c271fa7d90..c50ed9792720 100644
--- a/dpnp/dpnp_array.py
+++ b/dpnp/dpnp_array.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2022, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -140,7 +140,10 @@ def __bool__(self):
         return self._array_obj.__bool__()
 
  # '__class__',
- # '__complex__',
+
+    def __complex__(self):
+        return self._array_obj.__complex__()
+
  # '__contains__',
  # '__copy__',
  # '__deepcopy__',
@@ -150,6 +153,12 @@ def __bool__(self):
  # '__divmod__',
  # '__doc__',
 
+    def __dlpack__(self, stream=None):
+        return self._array_obj.__dlpack__(stream=stream)
+
+    def __dlpack_device__(self):
+        return self._array_obj.__dlpack_device__()
+
     def __eq__(self, other):
         return dpnp.equal(self, other)
 
@@ -187,7 +196,10 @@ def __gt__(self, other):
  # '__imatmul__',
  # '__imod__',
  # '__imul__',
- # '__index__',
+
+    def __index__(self):
+        return self._array_obj.__index__()
+
  # '__init__',
  # '__init_subclass__',
 
@@ -247,7 +259,10 @@ def __radd__(self, other):
  # '__rdivmod__',
  # '__reduce__',
  # '__reduce_ex__',
- # '__repr__',
+
+    def __repr__(self):
+        return dpt.usm_ndarray_repr(self._array_obj, prefix="array")
+
  # '__rfloordiv__',
  # '__rlshift__',
 
@@ -264,7 +279,9 @@ def __rmul__(self, other):
  # '__rpow__',
  # '__rrshift__',
  # '__rshift__',
- # '__rsub__',
+
+    def __rsub__(self, other):
+        return dpnp.subtract(other, self)
 
     def __rtruediv__(self, other):
         return dpnp.true_divide(other, self)
@@ -292,8 +309,7 @@ def __str__(self):
 
         """
 
-        return str(self.asnumpy())
-
+        return self._array_obj.__str__()
 
     def __sub__(self, other):
         return dpnp.subtract(self, other)
@@ -305,6 +321,16 @@ def __truediv__(self, other):
 
  # '__xor__',
 
+    @staticmethod
+    def _create_from_usm_ndarray(usm_ary : dpt.usm_ndarray):
+        if not isinstance(usm_ary, dpt.usm_ndarray):
+            raise TypeError(
+                f"Expected dpctl.tensor.usm_ndarray, got {type(usm_ary)}"
+                )
+        res = dpnp_array.__new__(dpnp_array)
+        res._array_obj = usm_ary
+        return res
+
     def all(self, axis=None, out=None, keepdims=False):
         """
         Returns True if all elements evaluate to True.
@@ -493,7 +519,7 @@ def conj(self):
 
         """
 
-        if not numpy.issubsctype(self.dtype, numpy.complex):
+        if not numpy.issubsctype(self.dtype, numpy.complex_):
             return self
         else:
             return dpnp.conjugate(self)
@@ -506,7 +532,7 @@ def conjugate(self):
 
         """
 
-        if not numpy.issubsctype(self.dtype, numpy.complex):
+        if not numpy.issubsctype(self.dtype, numpy.complex_):
             return self
         else:
             return dpnp.conjugate(self)
diff --git a/dpnp/dpnp_container.py b/dpnp/dpnp_container.py
index 93ab716eb59a..75e20f8a0cb6 100644
--- a/dpnp/dpnp_container.py
+++ b/dpnp/dpnp_container.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2022, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -45,8 +45,11 @@
     "arange",
     "asarray",
     "empty",
+    "eye",
     "full",
     "ones"
+    "tril",
+    "triu",
     "zeros",
 ]
 
@@ -150,6 +153,33 @@ def full(shape,
     return dpnp_array(array_obj.shape, buffer=array_obj, order=order)
 
 
+def eye(N,
+        M=None,
+        /,
+        *,
+        k=0,
+        dtype=None,
+        order="C",
+        device=None,
+        usm_type="device",
+        sycl_queue=None):
+    """Validate input parameters before passing them into `dpctl.tensor` module"""
+    dpu.validate_usm_type(usm_type, allow_none=False)
+    sycl_queue_normalized = dpnp.get_normalized_queue_device(sycl_queue=sycl_queue, device=device)
+    if order is None:
+        order = 'C'
+
+    """Creates `dpnp_array` with ones on the `k`th diagonal."""
+    array_obj = dpt.eye(N,
+                        M,
+                        k=k,
+                        dtype=dtype,
+                        order=order,
+                        usm_type=usm_type,
+                        sycl_queue=sycl_queue_normalized)
+    return dpnp_array(array_obj.shape, buffer=array_obj, order=order)
+
+
 def ones(shape,
          *,
          dtype=None,
@@ -172,6 +202,18 @@ def ones(shape,
     return dpnp_array(array_obj.shape, buffer=array_obj, order=order)
 
 
+def tril(x1, /, *, k=0):
+    """"Creates `dpnp_array` as lower triangular part of an input array."""
+    array_obj = dpt.tril(x1.get_array() if isinstance(x1, dpnp_array) else x1, k)
+    return dpnp_array(array_obj.shape, buffer=array_obj, order="K")
+
+
+def triu(x1, /, *, k=0):
+    """"Creates `dpnp_array` as upper triangular part of an input array."""
+    array_obj = dpt.triu(x1.get_array() if isinstance(x1, dpnp_array) else x1, k)
+    return dpnp_array(array_obj.shape, buffer=array_obj, order="K")
+
+
 def zeros(shape,
           *,
           dtype=None,
diff --git a/dpnp/dpnp_iface.py b/dpnp/dpnp_iface.py
index 4e791ad0eaf9..b7cdef8cc615 100644
--- a/dpnp/dpnp_iface.py
+++ b/dpnp/dpnp_iface.py
@@ -2,7 +2,7 @@
 # distutils: language = c++
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2022, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -61,15 +61,18 @@
     "asnumpy",
     "astype",
     "convert_single_elem_array_to_scalar",
+    "default_float_type",
     "dpnp_queue_initialize",
     "dpnp_queue_is_cpu",
+    "from_dlpack",
     "get_dpnp_descriptor",
     "get_include",
     "get_normalized_queue_device"
 ]
 
 from dpnp import (
-    isscalar
+    isscalar,
+    float64
 )
 
 from dpnp.dpnp_iface_arraycreation import *
@@ -191,9 +194,64 @@ def convert_single_elem_array_to_scalar(obj, keepdims=False):
     return obj
 
 
+def default_float_type(device=None, sycl_queue=None):
+    """
+    Return a floating type used by default in DPNP depending on device capabilities.
+
+    Parameters
+    ----------
+    device : {None, string, SyclDevice, SyclQueue}, optional
+        An array API concept of device where an array of default floating type might be created.
+        The `device` can be ``None`` (the default), an OneAPI filter selector string,
+        an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device,
+        an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by
+        :obj:`dpnp.dpnp_array.dpnp_array.device` property.
+        The value ``None`` is interpreted as to use a default device.
+    sycl_queue : {None, SyclQueue}, optional
+        A SYCL queue which might be used to create an array of default floating type.
+        The `sycl_queue` can be ``None`` (the default), which is interpreted as
+        to get the SYCL queue from `device` keyword if present or to use a default queue.
+
+    Returns
+    -------
+    dt : dtype
+        A default DPNP floating type.
+
+    """
+
+    _sycl_queue = get_normalized_queue_device(device=device, sycl_queue=sycl_queue)
+    return map_dtype_to_device(float64, _sycl_queue.sycl_device)
+
+
+def from_dlpack(obj, /):
+    """
+    Create a dpnp array from a Python object implementing the ``__dlpack__``
+    protocol.
+
+    See https://dmlc.github.io/dlpack/latest/ for more details.
+
+    Parameters
+    ----------
+    obj : object
+        A Python object representing an array that implements the ``__dlpack__``
+        and ``__dlpack_device__`` methods.
+
+    Returns
+    -------
+    out : dpnp_array
+        Returns a new dpnp array containing the data from another array
+        (obj) with the ``__dlpack__`` method on the same device as object.
+
+    """
+
+    usm_ary = dpt.from_dlpack(obj)
+    return dpnp_array._create_from_usm_ndarray(usm_ary)
+
+
 def get_dpnp_descriptor(ext_obj,
                         copy_when_strides=True,
                         copy_when_nondefault_queue=True,
+                        alloc_usm_type=None,
                         alloc_queue=None):
     """
     Return True:
@@ -214,9 +272,9 @@ def get_dpnp_descriptor(ext_obj,
         return False
 
     # If input object is a scalar, it means it was allocated on host memory.
-    # We need to copy it to device memory according to compute follows data paradigm.
+    # We need to copy it to USM memory according to compute follows data paradigm.
     if isscalar(ext_obj):
-        ext_obj = array(ext_obj, sycl_queue=alloc_queue)
+        ext_obj = array(ext_obj, usm_type=alloc_usm_type, sycl_queue=alloc_queue)
 
     # while dpnp functions have no implementation with strides support
     # we need to create a non-strided copy
diff --git a/dpnp/dpnp_iface_arraycreation.py b/dpnp/dpnp_iface_arraycreation.py
index 5fb4d8c7a4da..5b062a346b97 100644
--- a/dpnp/dpnp_iface_arraycreation.py
+++ b/dpnp/dpnp_iface_arraycreation.py
@@ -2,7 +2,7 @@
 # distutils: language = c++
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2022, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -42,12 +42,14 @@
 
 import numpy
 import dpnp
+import operator
 
 import dpnp.config as config
 from dpnp.dpnp_algo import *
 from dpnp.dpnp_utils import *
 
 import dpnp.dpnp_container as dpnp_container
+import dpctl.tensor as dpt
 
 
 __all__ = [
@@ -530,7 +532,7 @@ def empty_like(x1,
 
     Limitations
     -----------
-    Parameters ``x1`` is supported only as :class:`dpnp.dpnp_array`.
+    Parameter ``x1`` is supported as :class:`dpnp.dpnp_array` or :class:`dpctl.tensor.usm_ndarray`
     Parameter ``order`` is supported with values ``"C"`` or ``"F"``.
     Parameter ``subok`` is supported only with default value ``False``.
     Otherwise the function will be executed sequentially on CPU.
@@ -552,7 +554,7 @@ def empty_like(x1,
 
     """
 
-    if not isinstance(x1, dpnp.ndarray):
+    if not isinstance(x1, (dpnp.ndarray, dpt.usm_ndarray)):
         pass
     elif order not in ('C', 'c', 'F', 'f', None):
         pass
@@ -572,31 +574,43 @@ def empty_like(x1,
     return call_origin(numpy.empty_like, x1, dtype, order, subok, shape)
 
 
-def eye(N, M=None, k=0, dtype=None, order='C', **kwargs):
+def eye(N,
+        M=None,
+        /,
+        *,
+        k=0,
+        dtype=None,
+        order="C",
+        like=None,
+        device=None,
+        usm_type="device",
+        sycl_queue=None):
     """
     Return a 2-D array with ones on the diagonal and zeros elsewhere.
     For full documentation refer to :obj:`numpy.eye`.
 
     Limitations
     -----------
-    Input array is supported as :obj:`dpnp.ndarray`.
-    Parameters ``order`` is supported only with default value.
+    Parameter ``order`` is supported only with values ``"C"`` and ``"F"``.
+    Parameter ``like`` is supported only with default value ``None``.
+    Otherwise the function will be executed sequentially on CPU.
+
     """
-    if (not use_origin_backend()):
-        if not isinstance(N, (int, dpnp.int, dpnp.int32, dpnp.int64)):
-            pass
-        elif M is not None and not isinstance(M, (int, dpnp.int, dpnp.int32, dpnp.int64)):
-            pass
-        elif not isinstance(k, (int, dpnp.int, dpnp.int32, dpnp.int64)):
-            pass
-        elif order != 'C':
-            pass
-        elif len(kwargs) != 0:
-            pass
-        else:
-            return dpnp_eye(N, M=M, k=k, dtype=dtype).get_pyobj()
+    if order not in ('C', 'c', 'F', 'f', None):
+        pass
+    elif like is not None:
+        pass
+    else:
+        return dpnp_container.eye(N,
+                                  M,
+                                  k=k,
+                                  dtype=dtype,
+                                  order=order,
+                                  device=device,
+                                  usm_type=usm_type,
+                                  sycl_queue=sycl_queue)
 
-    return call_origin(numpy.eye, N, M=M, k=k, dtype=dtype, order=order, **kwargs)
+    return call_origin(numpy.eye, N, M, k=k, dtype=dtype, order=order, like=None)
 
 
 def frombuffer(buffer, **kwargs):
@@ -750,7 +764,7 @@ def full_like(x1,
 
     Limitations
     -----------
-    Parameters ``x1`` is supported only as :class:`dpnp.dpnp_array`.
+    Parameter ``x1`` is supported as :class:`dpnp.dpnp_array` or :class:`dpctl.tensor.usm_ndarray`
     Parameter ``order`` is supported only with values ``"C"`` and ``"F"``.
     Parameter ``subok`` is supported only with default value ``False``.
     Otherwise the function will be executed sequentially on CPU.
@@ -771,7 +785,7 @@ def full_like(x1,
     [1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
 
     """
-    if not isinstance(x1, dpnp.ndarray):
+    if not isinstance(x1, (dpnp.ndarray, dpt.usm_ndarray)):
         pass
     elif order not in ('C', 'c', 'F', 'f', None):
         pass
@@ -859,10 +873,8 @@ def identity(n, dtype=None, *, like=None):
         elif n < 0:
             pass
         else:
-            if dtype is None:
-                sycl_queue = dpnp.get_normalized_queue_device(sycl_queue=None, device=None)
-                dtype = map_dtype_to_device(dpnp.float64, sycl_queue.sycl_device)
-            return dpnp_identity(n, dtype).get_pyobj()
+            _dtype = dpnp.default_float_type() if dtype is None else dtype
+            return dpnp_identity(n, _dtype).get_pyobj()
 
     return call_origin(numpy.identity, n, dtype=dtype, like=like)
 
@@ -1179,7 +1191,7 @@ def ones_like(x1,
 
     Limitations
     -----------
-    Parameters ``x1`` is supported only as :class:`dpnp.dpnp_array`.
+    Parameter ``x1`` is supported as :class:`dpnp.dpnp_array` or :class:`dpctl.tensor.usm_ndarray`
     Parameter ``order`` is supported with values ``"C"`` or ``"F"``.
     Parameter ``subok`` is supported only with default value ``False``.
     Otherwise the function will be executed sequentially on CPU.
@@ -1201,7 +1213,7 @@ def ones_like(x1,
     [1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
 
     """
-    if not isinstance(x1, dpnp.ndarray):
+    if not isinstance(x1, (dpnp.ndarray, dpt.usm_ndarray)):
         pass
     elif order not in ('C', 'c', 'F', 'f', None):
         pass
@@ -1280,7 +1292,7 @@ def trace(x1, offset=0, axis1=0, axis2=1, dtype=None, out=None):
     return call_origin(numpy.trace, x1, offset, axis1, axis2, dtype, out)
 
 
-def tri(N, M=None, k=0, dtype=numpy.float, **kwargs):
+def tri(N, M=None, k=0, dtype=dpnp.float, **kwargs):
     """
     An array with ones at and below the given diagonal and zeros elsewhere.
 
@@ -1315,15 +1327,13 @@ def tri(N, M=None, k=0, dtype=numpy.float, **kwargs):
         elif not isinstance(k, int):
             pass
         else:
-            if dtype is numpy.float:
-                sycl_queue = dpnp.get_normalized_queue_device(sycl_queue=None, device=None)
-                dtype = map_dtype_to_device(dpnp.float64, sycl_queue.sycl_device)
-            return dpnp_tri(N, M, k, dtype).get_pyobj()
+            _dtype = dpnp.default_float_type() if dtype in (dpnp.float, None) else dtype
+            return dpnp_tri(N, M, k, _dtype).get_pyobj()
 
     return call_origin(numpy.tri, N, M, k, dtype, **kwargs)
 
 
-def tril(x1, k=0):
+def tril(x1, /, *, k=0):
     """
     Lower triangle of an array.
 
@@ -1331,6 +1341,12 @@ def tril(x1, k=0):
 
     For full documentation refer to :obj:`numpy.tril`.
 
+    Limitations
+    -----------
+    Parameter `x1` is supported as :class:`dpnp.dpnp_array` or :class:`dpctl.tensor.usm_ndarray` with two or more dimensions.
+    Parameter `k` is supported only of integer data type.
+    Otherwise the function will be executed sequentially on CPU.
+
     Examples
     --------
     >>> import dpnp as np
@@ -1342,17 +1358,25 @@ def tril(x1, k=0):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
-    if x1_desc:
-        if not isinstance(k, int):
-            pass
-        else:
-            return dpnp_tril(x1_desc, k).get_pyobj()
+    _k = None
+    try:
+        _k = operator.index(k)
+    except TypeError:
+        pass
+
+    if not isinstance(x1, (dpnp.ndarray, dpt.usm_ndarray)):
+        pass
+    elif x1.ndim < 2:
+        pass
+    elif _k is None:
+        pass
+    else:
+        return dpnp_container.tril(x1, k=_k)
 
     return call_origin(numpy.tril, x1, k)
 
 
-def triu(x1, k=0):
+def triu(x1, /, *, k=0):
     """
     Upper triangle of an array.
 
@@ -1361,6 +1385,12 @@ def triu(x1, k=0):
 
     For full documentation refer to :obj:`numpy.triu`.
 
+    Limitations
+    -----------
+    Parameter `x1` is supported as :class:`dpnp.dpnp_array` or :class:`dpctl.tensor.usm_ndarray` with two or more dimensions.
+    Parameter `k` is supported only of integer data type.
+    Otherwise the function will be executed sequentially on CPU.
+
     Examples
     --------
     >>> import dpnp as np
@@ -1372,12 +1402,20 @@ def triu(x1, k=0):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
-    if x1_desc:
-        if not isinstance(k, int):
-            pass
-        else:
-            return dpnp_triu(x1_desc, k).get_pyobj()
+    _k = None
+    try:
+        _k = operator.index(k)
+    except TypeError:
+        pass
+
+    if not isinstance(x1, (dpnp.ndarray, dpt.usm_ndarray)):
+        pass
+    elif x1.ndim < 2:
+        pass
+    elif _k is None:
+        pass
+    else:
+        return dpnp_container.triu(x1, k=_k)
 
     return call_origin(numpy.triu, x1, k)
 
@@ -1494,7 +1532,7 @@ def zeros_like(x1,
 
     Limitations
     -----------
-    Parameters ``x1`` is supported only as :class:`dpnp.dpnp_array`.
+    Parameter ``x1`` is supported as :class:`dpnp.dpnp_array` or :class:`dpctl.tensor.usm_ndarray`
     Parameter ``order`` is supported with values ``"C"`` or ``"F"``.
     Parameter ``subok`` is supported only with default value ``False``.
     Otherwise the function will be executed sequentially on CPU.
@@ -1515,8 +1553,8 @@ def zeros_like(x1,
     >>> [i for i in np.zeros_like(x)]
     [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
 
-"""
-    if not isinstance(x1, dpnp.ndarray):
+    """
+    if not isinstance(x1, (dpnp.ndarray, dpt.usm_ndarray)):
         pass
     elif order not in ('C', 'c', 'F', 'f', None):
         pass
diff --git a/dpnp/dpnp_iface_logic.py b/dpnp/dpnp_iface_logic.py
index 0f1e1b5fc0e5..e94b0f6c1efb 100644
--- a/dpnp/dpnp_iface_logic.py
+++ b/dpnp/dpnp_iface_logic.py
@@ -2,7 +2,7 @@
 # distutils: language = c++
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2020, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -219,18 +219,32 @@ def any(x1, axis=None, out=None, keepdims=False):
     return call_origin(numpy.any, x1, axis, out, keepdims)
 
 
-def equal(x1, x2):
+def equal(x1,
+          x2,
+          /,
+          out=None,
+          *,
+          where=True,
+          dtype=None,
+          subok=True):
     """
-    Return (x1 == x2) element-wise.
+    Return the truth value of (x1 == x2) element-wise.
 
     For full documentation refer to :obj:`numpy.equal`.
 
+    Returns
+    -------
+    out : dpnp.ndarray
+        Output array of bool type, element-wise comparison of `x1` and `x2`.
+
     Limitations
     -----------
-    Parameter ``x1`` is supported as :obj:`dpnp.ndarray`.
-    Parameter ``x2`` is supported as either :obj:`dpnp.ndarray` or int.
-    Input array data types are limited by supported DPNP :ref:`Data types`.
-    Sizes, shapes and data types of input arrays ``x1`` and ``x2`` are supported to be equal.
+    Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar,
+    but not both (at least either `x1` or `x2` should be as :class:`dpnp.ndarray`).
+    Parameters `out`, `where`, `dtype` and `subok` are supported with their default values.
+    Otherwise the function will be executed sequentially on CPU.
+    Input array data types are limited by supported DPNP :ref:`Data types`,
+    excluding `dpnp.complex64` and `dpnp.complex128`.
 
     See Also
     --------
@@ -250,33 +264,57 @@ def equal(x1, x2):
     [True, True, False]
 
     """
-
-    # x1_desc = dpnp.get_dpnp_descriptor(x1)
-    # x2_desc = dpnp.get_dpnp_descriptor(x2)
-    # if x1_desc and x2_desc:
-    #     if x1_desc.size != x2_desc.size:
-    #         pass
-    #     elif x1_desc.dtype != x2_desc.dtype:
-    #         pass
-    #     elif x1_desc.shape != x2_desc.shape:
-    #         pass
-    #     else:
-    #         return dpnp_equal(x1_desc, x2_desc).get_pyobj()
-
+    
+    if out is not None:
+        pass
+    elif where is not True:
+        pass
+    elif dtype is not None:
+        pass
+    elif subok is not True:
+        pass
+    elif dpnp.isscalar(x1) and dpnp.isscalar(x2):
+        # at least either x1 or x2 has to be an array
+        pass
+    else:
+        # get USM type and queue to copy scalar from the host memory into a USM allocation
+        usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None)
+
+        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        if x1_desc and x2_desc:
+            return dpnp_equal(x1_desc, x2_desc).get_pyobj()
     return call_origin(numpy.equal, x1, x2)
 
 
-def greater(x1, x2):
+def greater(x1,
+            x2,
+            /,
+            out=None,
+            *,
+            where=True,
+            dtype=None,
+            subok=True):
     """
-    Return (x1 > x2) element-wise.
+    Return the truth value of (x1 > x2) element-wise.
 
     For full documentation refer to :obj:`numpy.greater`.
 
+    Returns
+    -------
+    out : dpnp.ndarray
+        Output array of bool type, element-wise comparison of `x1` and `x2`.
+
     Limitations
     -----------
-    At least either ``x1`` or ``x2`` should be as :obj:`dpnp.ndarray`.
+    Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar,
+    but not both (at least either `x1` or `x2` should be as :class:`dpnp.ndarray`).
+    Parameters `out`, `where`, `dtype` and `subok` are supported with their default values.
     Otherwise the function will be executed sequentially on CPU.
-    Input array data types are limited by supported DPNP :ref:`Data types`.
+    Input array data types are limited by supported DPNP :ref:`Data types`,
+    excluding `dpnp.complex64` and `dpnp.complex128`.
 
     See Also
     --------
@@ -297,30 +335,56 @@ def greater(x1, x2):
 
     """
 
-    # x1_desc = dpnp.get_dpnp_descriptor(x1)
-    # x2_desc = dpnp.get_dpnp_descriptor(x2)
-    # if x1_desc and x2_desc:
-    #     if x1_desc.size < 2:
-    #         pass
-    #     elif x2_desc.size < 2:
-    #         pass
-    #     else:
-    #         return dpnp_greater(x1_desc, x2_desc).get_pyobj()
-
+    if out is not None:
+        pass
+    elif where is not True:
+        pass
+    elif dtype is not None:
+        pass
+    elif subok is not True:
+        pass
+    elif dpnp.isscalar(x1) and dpnp.isscalar(x2):
+        # at least either x1 or x2 has to be an array
+        pass
+    else:
+        # get USM type and queue to copy scalar from the host memory into a USM allocation
+        usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None)
+
+        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        if x1_desc and x2_desc:
+            return dpnp_greater(x1_desc, x2_desc).get_pyobj()
     return call_origin(numpy.greater, x1, x2)
 
 
-def greater_equal(x1, x2):
+def greater_equal(x1,
+                  x2,
+                  /,
+                  out=None,
+                  *,
+                  where=True,
+                  dtype=None,
+                  subok=True):
     """
-    Return (x1 >= x2) element-wise.
+    Return the truth value of (x1 >= x2) element-wise.
 
     For full documentation refer to :obj:`numpy.greater_equal`.
 
+    Returns
+    -------
+    out : dpnp.ndarray
+        Output array of bool type, element-wise comparison of `x1` and `x2`.
+
     Limitations
     -----------
-    At least either ``x1`` or ``x2`` should be as :obj:`dpnp.ndarray`.
+    Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar,
+    but not both (at least either `x1` or `x2` should be as :class:`dpnp.ndarray`).
+    Parameters `out`, `where`, `dtype` and `subok` are supported with their default values.
     Otherwise the function will be executed sequentially on CPU.
-    Input array data types are limited by supported DPNP :ref:`Data types`.
+    Input array data types are limited by supported DPNP :ref:`Data types`,
+    excluding `dpnp.complex64` and `dpnp.complex128`.
 
     See Also
     --------
@@ -341,16 +405,27 @@ def greater_equal(x1, x2):
 
     """
 
-    # x1_desc = dpnp.get_dpnp_descriptor(x1)
-    # x2_desc = dpnp.get_dpnp_descriptor(x2)
-    # if x1_desc and x2_desc:
-    #     if x1_desc.size < 2:
-    #         pass
-    #     elif x2_desc.size < 2:
-    #         pass
-    #     else:
-    #         return dpnp_greater_equal(x1_desc, x2_desc).get_pyobj()
-
+    if out is not None:
+        pass
+    elif where is not True:
+        pass
+    elif dtype is not None:
+        pass
+    elif subok is not True:
+        pass
+    elif dpnp.isscalar(x1) and dpnp.isscalar(x2):
+        # at least either x1 or x2 has to be an array
+        pass
+    else:
+        # get USM type and queue to copy scalar from the host memory into a USM allocation
+        usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None)
+
+        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        if x1_desc and x2_desc:
+            return dpnp_greater_equal(x1_desc, x2_desc).get_pyobj()
     return call_origin(numpy.greater_equal, x1, x2)
 
 
@@ -532,17 +607,32 @@ def isnan(x1, out=None, **kwargs):
     return call_origin(numpy.isnan, x1, out, **kwargs)
 
 
-def less(x1, x2):
+def less(x1,
+         x2,
+         /,
+         out=None,
+         *,
+         where=True,
+         dtype=None,
+         subok=True):
     """
-    Return (x1 < x2) element-wise.
+    Return the truth value of (x1 < x2) element-wise.
 
     For full documentation refer to :obj:`numpy.less`.
 
+    Returns
+    -------
+    out : dpnp.ndarray
+        Output array of bool type, element-wise comparison of `x1` and `x2`.
+
     Limitations
     -----------
-    At least either ``x1`` or ``x2`` should be as :obj:`dpnp.ndarray`.
+    Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar,
+    but not both (at least either `x1` or `x2` should be as :class:`dpnp.ndarray`).
+    Parameters `out`, `where`, `dtype` and `subok` are supported with their default values.
     Otherwise the function will be executed sequentially on CPU.
-    Input array data types are limited by supported DPNP :ref:`Data types`.
+    Input array data types are limited by supported DPNP :ref:`Data types`,
+    excluding `dpnp.complex64` and `dpnp.complex128`.
 
     See Also
     --------
@@ -563,30 +653,56 @@ def less(x1, x2):
 
     """
 
-    # x1_desc = dpnp.get_dpnp_descriptor(x1)
-    # x2_desc = dpnp.get_dpnp_descriptor(x2)
-    # if x1_desc and x2_desc:
-    #     if x1_desc.size < 2:
-    #         pass
-    #     elif x2_desc.size < 2:
-    #         pass
-    #     else:
-    #         return dpnp_less(x1_desc, x2_desc).get_pyobj()
-
+    if out is not None:
+        pass
+    elif where is not True:
+        pass
+    elif dtype is not None:
+        pass
+    elif subok is not True:
+        pass
+    elif dpnp.isscalar(x1) and dpnp.isscalar(x2):
+        # at least either x1 or x2 has to be an array
+        pass
+    else:
+        # get USM type and queue to copy scalar from the host memory into a USM allocation
+        usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None)
+
+        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        if x1_desc and x2_desc:
+            return dpnp_less(x1_desc, x2_desc).get_pyobj()
     return call_origin(numpy.less, x1, x2)
 
 
-def less_equal(x1, x2):
+def less_equal(x1,
+               x2,
+               /,
+               out=None,
+               *,
+               where=True,
+               dtype=None,
+               subok=True):
     """
-    Return (x1 <= x2) element-wise.
+    Return the truth value of (x1 <= x2) element-wise.
 
     For full documentation refer to :obj:`numpy.less_equal`.
 
+    Returns
+    -------
+    out : dpnp.ndarray
+        Output array of bool type, element-wise comparison of `x1` and `x2`.
+
     Limitations
     -----------
-    At least either ``x1`` or ``x2`` should be as :obj:`dpnp.ndarray`.
+    Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar,
+    but not both (at least either `x1` or `x2` should be as :class:`dpnp.ndarray`).
+    Parameters `out`, `where`, `dtype` and `subok` are supported with their default values.
     Otherwise the function will be executed sequentially on CPU.
-    Input array data types are limited by supported DPNP :ref:`Data types`.
+    Input array data types are limited by supported DPNP :ref:`Data types`,
+    excluding `dpnp.complex64` and `dpnp.complex128`.
 
     See Also
     --------
@@ -607,32 +723,56 @@ def less_equal(x1, x2):
 
     """
 
-    # x1_desc = dpnp.get_dpnp_descriptor(x1)
-    # x2_desc = dpnp.get_dpnp_descriptor(x2)
-    # if x1_desc and x2_desc:
-    #     if x1_desc.size < 2:
-    #         pass
-    #     elif x2_desc.size < 2:
-    #         pass
-    #     else:
-    #         return dpnp_less_equal(x1_desc, x2_desc).get_pyobj()
-
+    if out is not None:
+        pass
+    elif where is not True:
+        pass
+    elif dtype is not None:
+        pass
+    elif subok is not True:
+        pass
+    elif dpnp.isscalar(x1) and dpnp.isscalar(x2):
+        # at least either x1 or x2 has to be an array
+        pass
+    else:
+        # get USM type and queue to copy scalar from the host memory into a USM allocation
+        usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None)
+
+        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        if x1_desc and x2_desc:
+            return dpnp_less_equal(x1_desc, x2_desc).get_pyobj()
     return call_origin(numpy.less_equal, x1, x2)
 
 
-def logical_and(x1, x2, out=None, **kwargs):
+def logical_and(x1,
+                x2,
+                /,
+                out=None,
+                *,
+                where=True,
+                dtype=None,
+                subok=True):
     """
     Compute the truth value of x1 AND x2 element-wise.
 
     For full documentation refer to :obj:`numpy.logical_and`.
 
+    Returns
+    -------
+    out : dpnp.ndarray
+        Output array of bool type, element-wise logical comparison of `x1` and `x2`.
+
     Limitations
     -----------
-    Input arrays are supported as :obj:`dpnp.ndarray`.
+    Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar,
+    but not both (at least either `x1` or `x2` should be as :class:`dpnp.ndarray`).
+    Parameters `out`, `where`, `dtype` and `subok` are supported with their default values.
     Otherwise the function will be executed sequentially on CPU.
-    Input array data types are limited by supported DPNP :ref:`Data types`.
-    Parameter ``out`` is supported only with default value ``None``.
-    Parameter ``where`` is supported only with default value ``True``.
+    Input array data types are limited by supported DPNP :ref:`Data types`,
+    excluding `dpnp.complex64` and `dpnp.complex128`.
 
     See Also
     --------
@@ -652,30 +792,55 @@ def logical_and(x1, x2, out=None, **kwargs):
 
     """
 
-    # x1_desc = dpnp.get_dpnp_descriptor(x1)
-    # x2_desc = dpnp.get_dpnp_descriptor(x2)
-    # if x1_desc and x2_desc and not kwargs:
-    #     if out is not None:
-    #         pass
-    #     else:
-    #         return dpnp_logical_and(x1_desc, x2_desc).get_pyobj()
-
-    return call_origin(numpy.logical_and, x1, x2, out, **kwargs)
-
-
-def logical_not(x1, out=None, **kwargs):
+    if out is not None:
+        pass
+    elif where is not True:
+        pass
+    elif dtype is not None:
+        pass
+    elif subok is not True:
+        pass
+    elif dpnp.isscalar(x1) and dpnp.isscalar(x2):
+        # at least either x1 or x2 has to be an array
+        pass
+    else:
+        # get USM type and queue to copy scalar from the host memory into a USM allocation
+        usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None)
+
+        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        if x1_desc and x2_desc:
+            return dpnp_logical_and(x1_desc, x2_desc).get_pyobj()
+    return call_origin(numpy.logical_and, x1, x2)
+
+
+def logical_not(x,
+                /,
+                out=None,
+                *,
+                where=True,
+                dtype=None,
+                subok=True):
     """
     Compute the truth value of NOT x element-wise.
 
     For full documentation refer to :obj:`numpy.logical_not`.
 
+    Returns
+    -------
+    out : dpnp.ndarray
+        Boolean result with the same shape as `x` of the NOT operation
+        on elements of `x`.
+
     Limitations
     -----------
-    Input array is supported as :obj:`dpnp.ndarray`.
+    Parameters `x` is only supported as :class:`dpnp.ndarray`.
+    Parameters `out`, `where`, `dtype` and `subok` are supported with their default values.
     Otherwise the function will be executed sequentially on CPU.
-    Input array data types are limited by supported DPNP :ref:`Data types`.
-    Parameter ``out`` is supported only with default value ``None``.
-    Parameter ``where`` is supported only with default value ``True``.
+    Input array data type is limited by supported DPNP :ref:`Data types`,
+    excluding `dpnp.complex64` and `dpnp.complex128`.
 
     See Also
     --------
@@ -693,29 +858,47 @@ def logical_not(x1, out=None, **kwargs):
 
     """
 
-    # x1_desc = dpnp.get_dpnp_descriptor(x1)
-    # if x1_desc and not kwargs:
-    #     if out is not None:
-    #         pass
-    #     else:
-    #         return dpnp_logical_not(x1_desc).get_pyobj()
-
-    return call_origin(numpy.logical_not, x1, out, **kwargs)
-
-
-def logical_or(x1, x2, out=None, **kwargs):
+    if out is not None:
+        pass
+    elif where is not True:
+        pass
+    elif dtype is not None:
+        pass
+    elif subok is not True:
+        pass
+    else:
+        x1_desc = dpnp.get_dpnp_descriptor(x, copy_when_strides=False, copy_when_nondefault_queue=False)
+        if x1_desc:
+            return dpnp_logical_not(x1_desc).get_pyobj()
+    return call_origin(numpy.logical_not, x)
+
+
+def logical_or(x1,
+               x2,
+               /,
+               out=None,
+               *,
+               where=True,
+               dtype=None,
+               subok=True):
     """
     Compute the truth value of x1 OR x2 element-wise.
 
     For full documentation refer to :obj:`numpy.logical_or`.
 
+    Returns
+    -------
+    out : dpnp.ndarray
+        Output array of bool type, element-wise logical comparison of `x1` and `x2`.
+
     Limitations
     -----------
-    Input arrays are supported as :obj:`dpnp.ndarray`.
+    Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar,
+    but not both (at least either `x1` or `x2` should be as :class:`dpnp.ndarray`).
+    Parameters `out`, `where`, `dtype` and `subok` are supported with their default values.
     Otherwise the function will be executed sequentially on CPU.
-    Input array data types are limited by supported DPNP :ref:`Data types`.
-    Parameter ``out`` is supported only with default value ``None``.
-    Parameter ``where`` is supported only with default value ``True``.
+    Input array data types are limited by supported DPNP :ref:`Data types`,
+    excluding `dpnp.complex64` and `dpnp.complex128`.
 
     See Also
     --------
@@ -735,30 +918,56 @@ def logical_or(x1, x2, out=None, **kwargs):
 
     """
 
-    # x1_desc = dpnp.get_dpnp_descriptor(x1)
-    # x2_desc = dpnp.get_dpnp_descriptor(x2)
-    # if x1_desc and x2_desc and not kwargs:
-    #     if out is not None:
-    #         pass
-    #     else:
-    #         return dpnp_logical_or(x1_desc, x2_desc).get_pyobj()
-
-    return call_origin(numpy.logical_or, x1, x2, out, **kwargs)
-
-
-def logical_xor(x1, x2, out=None, **kwargs):
+    if out is not None:
+        pass
+    elif where is not True:
+        pass
+    elif dtype is not None:
+        pass
+    elif subok is not True:
+        pass
+    elif dpnp.isscalar(x1) and dpnp.isscalar(x2):
+        # at least either x1 or x2 has to be an array
+        pass
+    else:
+        # get USM type and queue to copy scalar from the host memory into a USM allocation
+        usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None)
+
+        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        if x1_desc and x2_desc:
+            return dpnp_logical_or(x1_desc, x2_desc).get_pyobj()
+    return call_origin(numpy.logical_or, x1, x2)
+
+
+def logical_xor(x1,
+               x2,
+               /,
+               out=None,
+               *,
+               where=True,
+               dtype=None,
+               subok=True):
     """
-    Compute the truth value of x1 XOR x2, element-wise.
+    Compute the truth value of x1 XOR x2 element-wise.
 
     For full documentation refer to :obj:`numpy.logical_xor`.
 
+    Returns
+    -------
+    out : dpnp.ndarray
+        Output array of bool type, element-wise logical comparison of `x1` and `x2`.
+
     Limitations
     -----------
-    Input arrays are supported as :obj:`dpnp.ndarray`.
+    Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar,
+    but not both (at least either `x1` or `x2` should be as :class:`dpnp.ndarray`).
+    Parameters `out`, `where`, `dtype` and `subok` are supported with their default values.
     Otherwise the function will be executed sequentially on CPU.
-    Input array data types are limited by supported DPNP :ref:`Data types`.
-    Parameter ``out`` is supported only with default value ``None``.
-    Parameter ``where`` is supported only with default value ``True``.
+    Input array data types are limited by supported DPNP :ref:`Data types`,
+    excluding `dpnp.complex64` and `dpnp.complex128`.
 
     See Also
     --------
@@ -778,29 +987,56 @@ def logical_xor(x1, x2, out=None, **kwargs):
 
     """
 
-    # x1_desc = dpnp.get_dpnp_descriptor(x1)
-    # x2_desc = dpnp.get_dpnp_descriptor(x2)
-    # if x1_desc and x2_desc and not kwargs:
-    #     if out is not None:
-    #         pass
-    #     else:
-    #         return dpnp_logical_xor(x1_desc, x2_desc).get_pyobj()
-
-    return call_origin(numpy.logical_xor, x1, x2, out, **kwargs)
-
-
-def not_equal(x1, x2):
+    if out is not None:
+        pass
+    elif where is not True:
+        pass
+    elif dtype is not None:
+        pass
+    elif subok is not True:
+        pass
+    elif dpnp.isscalar(x1) and dpnp.isscalar(x2):
+        # at least either x1 or x2 has to be an array
+        pass
+    else:
+        # get USM type and queue to copy scalar from the host memory into a USM allocation
+        usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None)
+
+        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        if x1_desc and x2_desc:
+            return dpnp_logical_xor(x1_desc, x2_desc).get_pyobj()
+    return call_origin(numpy.logical_xor, x1, x2)
+
+
+def not_equal(x1,
+              x2,
+              /,
+              out=None,
+              *,
+              where=True,
+              dtype=None,
+              subok=True):
     """
-    Return (x1 != x2) element-wise.
+    Return the truth value of (x1 != x2) element-wise.
 
     For full documentation refer to :obj:`numpy.not_equal`.
 
+    Returns
+    -------
+    out : dpnp.ndarray
+        Output array of bool type, element-wise comparison of `x1` and `x2`.
+
     Limitations
     -----------
-    At least either ``x1`` or ``x2`` should be as :obj:`dpnp.ndarray`.
-    If either ``x1`` or ``x2`` is scalar then other one should be :obj:`dpnp.ndarray`.
+    Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar,
+    but not both (at least either `x1` or `x2` should be as :class:`dpnp.ndarray`).
+    Parameters `out`, `where`, `dtype` and `subok` are supported with their default values.
     Otherwise the function will be executed sequentially on CPU.
-    Input array data types are limited by supported DPNP :ref:`Data types`.
+    Input array data types are limited by supported DPNP :ref:`Data types`,
+    excluding `dpnp.complex64` and `dpnp.complex128`.
 
     See Also
     --------
@@ -821,16 +1057,25 @@ def not_equal(x1, x2):
 
     """
 
-    # x1_desc = dpnp.get_dpnp_descriptor(x1)
-    # x2_desc = dpnp.get_dpnp_descriptor(x2)
-    # if x1_desc and x2_desc:
-    #     if x1_desc.size < 2:
-    #         pass
-    #     elif x2_desc.size < 2:
-    #         pass
-    #     else:
-    #         result = dpnp_not_equal(x1_desc, x2_desc).get_pyobj()
-
-    #         return result
-
+    if out is not None:
+        pass
+    elif where is not True:
+        pass
+    elif dtype is not None:
+        pass
+    elif subok is not True:
+        pass
+    elif dpnp.isscalar(x1) and dpnp.isscalar(x2):
+        # at least either x1 or x2 has to be an array
+        pass
+    else:
+        # get USM type and queue to copy scalar from the host memory into a USM allocation
+        usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None)
+
+        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        if x1_desc and x2_desc:
+            return dpnp_not_equal(x1_desc, x2_desc).get_pyobj()
     return call_origin(numpy.not_equal, x1, x2)
diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py
index 26b81a67dd95..feff53288cfd 100644
--- a/dpnp/dpnp_iface_mathematical.py
+++ b/dpnp/dpnp_iface_mathematical.py
@@ -2,7 +2,7 @@
 # distutils: language = c++
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2022, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -154,58 +154,68 @@ def absolute(x1, **kwargs):
     return call_origin(numpy.absolute, x1, **kwargs)
 
 
-def add(x1, x2, dtype=None, out=None, where=True, **kwargs):
+def add(x1,
+        x2,
+        /,
+        out=None,
+        *,
+        where=True,
+        dtype=None,
+        subok=True,
+        **kwargs):
     """
     Add arguments element-wise.
 
     For full documentation refer to :obj:`numpy.add`.
 
+    Returns
+    -------
+    y : dpnp.ndarray
+        The sum of `x1` and `x2`, element-wise.
+
     Limitations
     -----------
-    Parameters ``x1`` and ``x2`` are supported as either :obj:`dpnp.ndarray` or scalar.
-    Parameters ``dtype``, ``out`` and ``where`` are supported with their default values.
+    Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar,
+    but not both (at least either `x1` or `x2` should be as :class:`dpnp.ndarray`).
+    Parameters `out`, `where`, `dtype` and `subok` are supported with their default values.
     Keyword arguments ``kwargs`` are currently unsupported.
-    Otherwise the functions will be executed sequentially on CPU.
+    Otherwise the function will be executed sequentially on CPU.
     Input array data types are limited by supported DPNP :ref:`Data types`.
 
     Examples
     --------
-    >>> import dpnp as np
-    >>> a = np.array([1, 2, 3])
-    >>> b = np.array([1, 2, 3])
-    >>> result = np.add(a, b)
-    >>> [x for x in result]
+    >>> import dpnp as dp
+    >>> a = dp.array([1, 2, 3])
+    >>> b = dp.array([1, 2, 3])
+    >>> result = dp.add(a, b)
+    >>> print(result)
     [2, 4, 6]
 
     """
 
-    x1_is_scalar = dpnp.isscalar(x1)
-    x2_is_scalar = dpnp.isscalar(x2)
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False)
-    x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False)
+    if out is not None:
+        pass
+    elif where is not True:
+        pass
+    elif dtype is not None:
+        pass
+    elif subok is not True:
+        pass
+    elif dpnp.isscalar(x1) and dpnp.isscalar(x2):
+        # at least either x1 or x2 has to be an array
+        pass
+    else:
+        # get USM type and queue to copy scalar from the host memory into a USM allocation
+        usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None)
 
-    if x1_desc and x2_desc and not kwargs:
-        if not x1_desc and not x1_is_scalar:
-            pass
-        elif not x2_desc and not x2_is_scalar:
-            pass
-        elif x1_is_scalar and x2_is_scalar:
-            pass
-        elif x1_desc and x1_desc.ndim == 0:
-            pass
-        elif x2_desc and x2_desc.ndim == 0:
-            pass
-        elif dtype is not None:
-            pass
-        elif out is not None:
-            pass
-        elif not where:
-            pass
-        else:
-            out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) if out is not None else None
-            return dpnp_add(x1_desc, x2_desc, dtype, out_desc, where).get_pyobj()
+        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        if x1_desc and x2_desc:
+            return dpnp_add(x1_desc, x2_desc, dtype=dtype, out=out, where=where).get_pyobj()
 
-    return call_origin(numpy.add, x1, x2, dtype=dtype, out=out, where=where, **kwargs)
+    return call_origin(numpy.add, x1, x2, out=out, where=where, dtype=dtype, subok=subok, **kwargs)
 
 
 def around(x1, decimals=0, out=None):
@@ -534,55 +544,66 @@ def diff(x1, n=1, axis=-1, prepend=numpy._NoValue, append=numpy._NoValue):
     return call_origin(numpy.diff, x1, n=n, axis=axis, prepend=prepend, append=append)
 
 
-def divide(x1, x2, dtype=None, out=None, where=True, **kwargs):
+def divide(x1,
+           x2,
+           /,
+           out=None,
+           *,
+           where=True,
+           dtype=None,
+           subok=True,
+           **kwargs):
     """
     Divide arguments element-wise.
 
     For full documentation refer to :obj:`numpy.divide`.
 
+    Returns
+    -------
+    y : dpnp.ndarray
+        The quotient ``x1/x2``, element-wise.
+    
     Limitations
     -----------
-    Parameters ``x1`` and ``x2`` are supported as either :obj:`dpnp.ndarray` or scalar.
-    Parameters ``dtype``, ``out`` and ``where`` are supported with their default values.
+    Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar,
+    but not both (at least either `x1` or `x2` should be as :class:`dpnp.ndarray`).
+    Parameters `out`, `where`, `dtype` and `subok` are supported with their default values.
     Keyword arguments ``kwargs`` are currently unsupported.
-    Otherwise the functions will be executed sequentially on CPU.
+    Otherwise the function will be executed sequentially on CPU.
     Input array data types are limited by supported DPNP :ref:`Data types`.
 
     Examples
     --------
-    >>> import dpnp as np
-    >>> result = np.divide(np.array([1, -2, 6, -9]), np.array([-2, -2, -2, -2]))
-    >>> [x for x in result]
+    >>> import dpnp as dp
+    >>> result = dp.divide(dp.array([1, -2, 6, -9]), dp.array([-2, -2, -2, -2]))
+    >>> print(result)
     [-0.5, 1.0, -3.0, 4.5]
 
     """
 
-    x1_is_scalar = dpnp.isscalar(x1)
-    x2_is_scalar = dpnp.isscalar(x2)
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False)
-    x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False)
+    if out is not None:
+        pass
+    elif where is not True:
+        pass
+    elif dtype is not None:
+        pass
+    elif subok is not True:
+        pass
+    elif dpnp.isscalar(x1) and dpnp.isscalar(x2):
+        # at least either x1 or x2 has to be an array
+        pass
+    else:
+        # get USM type and queue to copy scalar from the host memory into a USM allocation
+        usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None)
 
-    if x1_desc and x2_desc and not kwargs:
-        if not x1_desc and not x1_is_scalar:
-            pass
-        elif not x2_desc and not x2_is_scalar:
-            pass
-        elif x1_is_scalar and x2_is_scalar:
-            pass
-        elif x1_desc and x1_desc.ndim == 0:
-            pass
-        elif x2_desc and x2_desc.ndim == 0:
-            pass
-        elif dtype is not None:
-            pass
-        elif out is not None:
-            pass
-        elif not where:
-            pass
-        else:
+        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        if x1_desc and x2_desc:
             return dpnp_divide(x1_desc, x2_desc, dtype=dtype, out=out, where=where).get_pyobj()
 
-    return call_origin(numpy.divide, x1, x2, dtype=dtype, out=out, where=where, **kwargs)
+    return call_origin(numpy.divide, x1, x2, out=out, where=where, dtype=dtype, subok=subok, **kwargs)
 
 
 def ediff1d(x1, to_end=None, to_begin=None):
@@ -1093,11 +1114,11 @@ def multiply(x1,
     -------
     y : {dpnp.ndarray, scalar}
         The product of `x1` and `x2`, element-wise.
-        The result is a scalar if both x1 and x2 are scalars.
 
     Limitations
     -----------
-    Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar.
+    Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar,
+    but not both (at least either `x1` or `x2` should be as :class:`dpnp.ndarray`).
     Parameters `out`, `where`, `dtype` and `subok` are supported with their default values.
     Keyword arguments ``kwargs`` are currently unsupported.
     Otherwise the functions will be executed sequentially on CPU.
@@ -1122,18 +1143,20 @@ def multiply(x1,
     elif subok is not True:
         pass
     elif dpnp.isscalar(x1) and dpnp.isscalar(x2):
-        # keep the result in host memory, if both inputs are scalars
-        return x1 * x2
+        # at least either x1 or x2 has to be an array
+        pass
     else:
-        # get a common queue to copy data from the host into a device if any input is scalar
-        queue = get_common_allocation_queue([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else None
+        # get USM type and queue to copy scalar from the host memory into a USM allocation
+        usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None)
 
-        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue)
-        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue)
+        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
         if x1_desc and x2_desc:
             return dpnp_multiply(x1_desc, x2_desc, dtype=dtype, out=out, where=where).get_pyobj()
 
-    return call_origin(numpy.multiply, x1, x2, dtype=dtype, out=out, where=where, **kwargs)
+    return call_origin(numpy.multiply, x1, x2, out=out, where=where, dtype=dtype, subok=subok, **kwargs)
 
 
 def nancumprod(x1, **kwargs):
@@ -1508,60 +1531,69 @@ def sign(x1, **kwargs):
     return call_origin(numpy.sign, x1, **kwargs)
 
 
-def subtract(x1, x2, dtype=None, out=None, where=True, **kwargs):
+def subtract(x1,
+             x2,
+             /,
+             out=None,
+             *,
+             where=True,
+             dtype=None,
+             subok=True,
+             **kwargs):
     """
     Subtract arguments, element-wise.
 
     For full documentation refer to :obj:`numpy.subtract`.
 
+    Returns
+    -------
+    y : dpnp.ndarray
+        The difference of `x1` and `x2`, element-wise.
+    
     Limitations
     -----------
-    Parameters ``x1`` and ``x2`` are supported as either :obj:`dpnp.ndarray` or scalar.
-    Parameters ``dtype``, ``out`` and ``where`` are supported with their default values.
+    Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar,
+    but not both (at least either `x1` or `x2` should be as :class:`dpnp.ndarray`).
+    Parameters `out`, `where`, `dtype` and `subok` are supported with their default values.
     Keyword arguments ``kwargs`` are currently unsupported.
-    Otherwise the functions will be executed sequentially on CPU.
+    Otherwise the function will be executed sequentially on CPU.
     Input array data types are limited by supported DPNP :ref:`Data types`.
 
     Example
     -------
-    >>> import dpnp as np
-    >>> result = np.subtract(np.array([4, 3]), np.array([2, 7]))
-    >>> [x for x in result]
+    >>> import dpnp as dp
+    >>> result = dp.subtract(dp.array([4, 3]), dp.array([2, 7]))
+    >>> print(result)
     [2, -4]
 
     """
 
-    x1_is_scalar = dpnp.isscalar(x1)
-    x2_is_scalar = dpnp.isscalar(x2)
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False)
-    x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False)
+    if out is not None:
+        pass
+    elif where is not True:
+        pass
+    elif dtype is not None:
+        pass
+    elif subok is not True:
+        pass
+    elif dpnp.isscalar(x1) and dpnp.isscalar(x2):
+        # at least either x1 or x2 has to be an array
+        pass
+    else:
+        # get USM type and queue to copy scalar from the host memory into a USM allocation
+        usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None)
 
-    if x1_desc and x2_desc and not kwargs:
-        if not x1_desc and not x1_is_scalar:
-            pass
-        elif not x2_desc and not x2_is_scalar:
-            pass
-        elif x1_is_scalar and x2_is_scalar:
-            pass
-        elif x1_desc and x1_desc.ndim == 0:
-            pass
-        elif x1_desc and x1_desc.dtype == numpy.bool:
-            pass
-        elif x2_desc and x2_desc.ndim == 0:
-            pass
-        elif x2_desc and x2_desc.dtype == numpy.bool:
-            pass
-        elif dtype is not None:
-            pass
-        elif out is not None:
-            pass
-        elif not where:
-            pass
-        else:
-            out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) if out is not None else None
-            return dpnp_subtract(x1_desc, x2_desc, dtype=dtype, out=out_desc, where=where).get_pyobj()
+        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        if x1_desc and x2_desc:
+            if x1_desc.dtype == x2_desc.dtype == dpnp.bool:
+                raise TypeError("DPNP boolean subtract, the `-` operator, is not supported, "
+                                "use the bitwise_xor, the `^` operator, or the logical_xor function instead.")
+            return dpnp_subtract(x1_desc, x2_desc, dtype=dtype, out=out, where=where).get_pyobj()
 
-    return call_origin(numpy.subtract, x1, x2, dtype=dtype, out=out, where=where, **kwargs)
+    return call_origin(numpy.subtract, x1, x2, out=out, where=where, dtype=dtype, subok=subok, **kwargs)
 
 
 def sum(x1, axis=None, dtype=None, out=None, keepdims=False, initial=None, where=True):
diff --git a/dpnp/dpnp_iface_statistics.py b/dpnp/dpnp_iface_statistics.py
index 27eaf4a115f5..ab92f8cc6251 100644
--- a/dpnp/dpnp_iface_statistics.py
+++ b/dpnp/dpnp_iface_statistics.py
@@ -2,7 +2,7 @@
 # distutils: language = c++
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2020, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -299,7 +299,7 @@ def cov(x1, y=None, rowvar=True, bias=False, ddof=None, fweights=None, aweights=
     return call_origin(numpy.cov, x1, y, rowvar, bias, ddof, fweights, aweights)
 
 
-def histogram(a, bins=10, range=None, normed=None, weights=None, density=None):
+def histogram(a, bins=10, range=None, density=None, weights=None):
     """
     Compute the histogram of a dataset.
     For full documentation refer to :obj:`numpy.histogram`.
@@ -323,7 +323,7 @@ def histogram(a, bins=10, range=None, normed=None, weights=None, density=None):
     1.0
     """
 
-    return call_origin(numpy.histogram, a=a, bins=bins, range=range, normed=normed, weights=weights, density=density)
+    return call_origin(numpy.histogram, a=a, bins=bins, range=range, density=density, weights=weights)
 
 
 def max(x1, axis=None, out=None, keepdims=False, initial=None, where=True):
diff --git a/dpnp/dpnp_iface_types.py b/dpnp/dpnp_iface_types.py
index dfcf599bf3be..a39cfa47cd12 100644
--- a/dpnp/dpnp_iface_types.py
+++ b/dpnp/dpnp_iface_types.py
@@ -2,7 +2,7 @@
 # distutils: language = c++
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2020, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -36,12 +36,12 @@
 
 import numpy
 
+
 __all__ = [
     "bool",
     "bool_",
     "complex128",
     "complex64",
-    "default_float_type",
     "dtype",
     "float",
     "float16",
@@ -59,7 +59,7 @@
     "void"
 ]
 
-bool = numpy.bool
+bool = numpy.bool_
 bool_ = numpy.bool_
 complex128 = numpy.complex128
 complex64 = numpy.complex64
@@ -67,18 +67,14 @@
 float16 = numpy.float16
 float32 = numpy.float32
 float64 = numpy.float64
-float = numpy.float
+float = numpy.float_
 int32 = numpy.int32
 int64 = numpy.int64
 integer = numpy.integer
-int = numpy.int
+int = numpy.int_
 longcomplex = numpy.longcomplex
 
 
-def default_float_type():
-    return float64
-
-
 def isscalar(obj):
     """
     Returns True if the type of `obj` is a scalar type.
diff --git a/dpnp/dpnp_utils/dpnp_algo_utils.pxd b/dpnp/dpnp_utils/dpnp_algo_utils.pxd
index 0924dae26408..db7127319bb0 100644
--- a/dpnp/dpnp_utils/dpnp_algo_utils.pxd
+++ b/dpnp/dpnp_utils/dpnp_algo_utils.pxd
@@ -1,7 +1,7 @@
 # cython: language_level=3
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2020, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -123,7 +123,7 @@ cdef class dpnp_descriptor:
     cdef void * get_data(self)
 
 
-cdef shape_type_c get_common_shape(shape_type_c input1_shape, shape_type_c input2_shape)
+cdef shape_type_c get_common_shape(shape_type_c input1_shape, shape_type_c input2_shape) except *
 """
 Calculate common shape from input shapes
 """
diff --git a/dpnp/dpnp_utils/dpnp_algo_utils.pyx b/dpnp/dpnp_utils/dpnp_algo_utils.pyx
index c09bef8ec485..672aa19e4dcb 100644
--- a/dpnp/dpnp_utils/dpnp_algo_utils.pyx
+++ b/dpnp/dpnp_utils/dpnp_algo_utils.pyx
@@ -1,7 +1,7 @@
 # cython: language_level=3
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2022, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -34,7 +34,7 @@ This module contains differnt helpers and utilities
 import numpy
 
 import dpctl
-import dpctl.tensor as dpt
+import dpctl.utils as dpu
 
 import dpnp.config as config
 import dpnp.dpnp_container as dpnp_container
@@ -70,7 +70,7 @@ __all__ = [
     "dpnp_descriptor",
     "get_axis_indeces",
     "get_axis_offsets",
-    "get_common_allocation_queue",
+    "get_usm_allocations",
     "_get_linear_index",
     "map_dtype_to_device",
     "normalize_axis",
@@ -163,9 +163,9 @@ def call_origin(function, *args, **kwargs):
         kwargx = convert_item(kwarg)
         kwargs_new[key] = kwargx
 
-    exec_q = dpctl.utils.get_execution_queue(alloc_queues)
+    exec_q = dpu.get_execution_queue(alloc_queues)
     if exec_q is None:
-        exec_q = sycl_queue
+        exec_q = dpnp.get_normalized_queue_device(sycl_queue=sycl_queue)
     # print(f"DPNP call_origin(): bakend called. \n\t function={function}, \n\t args_new={args_new}, \n\t kwargs_new={kwargs_new}, \n\t dpnp_inplace={dpnp_inplace}")
     # TODO need to put array memory into NumPy call
     result_origin = function(*args_new, **kwargs_new)
@@ -220,30 +220,49 @@ def unwrap_array(x1):
     return x1
 
 
-def get_common_allocation_queue(objects):
-    """
-    Given a list of objects returns the queue which can be used for a memory allocation
-    to follow compute follows data paradigm, or returns `None` if the default queue can be used.
-    An exception will be raised, if the paradigm is broked for the given list of objects.
-    """
-    if not isinstance(objects, (list, tuple)):
-        raise TypeError("Expected a list or a tuple, got {}".format(type(objects)))
-    
-    if len(objects) == 0:
+def _get_coerced_usm_type(objects):
+    types_in_use = [obj.usm_type for obj in objects if hasattr(obj, "usm_type")]
+    if len(types_in_use) == 0:
         return None
+    elif len(types_in_use) == 1:
+        return types_in_use[0]
+
+    common_usm_type = dpu.get_coerced_usm_type(types_in_use)
+    if common_usm_type is None:
+        raise ValueError("Input arrays must have coerced USM types")
+    return common_usm_type
 
+
+def _get_common_allocation_queue(objects):
     queues_in_use = [obj.sycl_queue for obj in objects if hasattr(obj, "sycl_queue")]
     if len(queues_in_use) == 0:
         return None
     elif len(queues_in_use) == 1:
         return queues_in_use[0]
 
-    common_queue = dpt.get_execution_queue(queues_in_use)
+    common_queue = dpu.get_execution_queue(queues_in_use)
     if common_queue is None:
         raise ValueError("Input arrays must be allocated on the same SYCL queue")
     return common_queue
 
 
+def get_usm_allocations(objects):
+    """
+    Given a list of objects returns a tuple of USM type and SYCL queue
+    which can be used for a memory allocation and to follow compute follows data paradigm,
+    or returns `(None, None)` if the default USM type and SYCL queue can be used.
+    An exception will be raised, if the paradigm is broked for the given list of objects.
+
+    """
+
+    if not isinstance(objects, (list, tuple)):
+        raise TypeError("Expected a list or a tuple, got {}".format(type(objects)))
+    
+    if len(objects) == 0:
+        return (None, None)
+    return (_get_coerced_usm_type(objects), _get_common_allocation_queue(objects))
+
+
 def map_dtype_to_device(dtype, device):
     """
     Map an input ``dtype`` with type ``device`` may use
@@ -399,7 +418,7 @@ cdef tuple get_shape_dtype(object input_obj):
 
             # shape and dtype does not match with siblings.
             if ((return_shape != elem_shape) or (return_dtype != elem_dtype)):
-                return (elem_shape, numpy.dtype(numpy.object))
+                return (elem_shape, numpy.dtype(numpy.object_))
 
         list_shape.push_back(len(input_obj))
         list_shape.insert(list_shape.end(), return_shape.begin(), return_shape.end())
@@ -429,7 +448,9 @@ cpdef find_common_type(object x1_obj, object x2_obj):
     return numpy.find_common_type(array_types, scalar_types)
 
 
-cdef shape_type_c get_common_shape(shape_type_c input1_shape, shape_type_c input2_shape):
+cdef shape_type_c get_common_shape(shape_type_c input1_shape, shape_type_c input2_shape) except *:
+    cdef shape_type_c input1_shape_orig = input1_shape
+    cdef shape_type_c input2_shape_orig = input2_shape
     cdef shape_type_c result_shape
 
     # ex (8, 1, 6, 1) and (7, 1, 5) -> (8, 1, 6, 1) and (1, 7, 1, 5)
@@ -446,9 +467,9 @@ cdef shape_type_c get_common_shape(shape_type_c input1_shape, shape_type_c input
         elif input2_shape[it] == 1:
             result_shape.push_back(input1_shape[it])
         else:
-            err_msg = f"{ERROR_PREFIX} in function get_common_shape()"
-            err_msg += f"operands could not be broadcast together with shapes {input1_shape} {input2_shape}"
-            ValueError(err_msg)
+            err_msg = f"{ERROR_PREFIX} in function get_common_shape(): "
+            err_msg += f"operands could not be broadcast together with shapes {input1_shape_orig} {input2_shape_orig}"
+            raise ValueError(err_msg)
 
     return result_shape
 
@@ -629,10 +650,7 @@ cdef tuple get_common_usm_allocation(dpnp_descriptor x1, dpnp_descriptor x2):
             "could not recognize common USM type for inputs of USM types {} and {}"
             "".format(array1_obj.usm_type, array2_obj.usm_type))
 
-    common_sycl_queue = dpctl.utils.get_execution_queue((array1_obj.sycl_queue, array2_obj.sycl_queue))
-    # TODO: refactor, remove when CFD is implemented in all array constructors
-    if common_sycl_queue is None and array1_obj.sycl_context == array2_obj.sycl_context:
-        common_sycl_queue = array1_obj.sycl_queue
+    common_sycl_queue = dpu.get_execution_queue((array1_obj.sycl_queue, array2_obj.sycl_queue))
     if common_sycl_queue is None:
         raise ValueError(
             "could not recognize common SYCL queue for inputs in SYCL queues {} and {}"
diff --git a/dpnp/random/dpnp_iface_random.py b/dpnp/random/dpnp_iface_random.py
index 677f2a7e94bc..ade85bb2fe18 100644
--- a/dpnp/random/dpnp_iface_random.py
+++ b/dpnp/random/dpnp_iface_random.py
@@ -1,7 +1,7 @@
 # cython: language_level=3
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2022, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -98,11 +98,20 @@
 ]
 
 
-def _get_random_state():
-    global _dpnp_random_state
-    if _dpnp_random_state is None:
-        _dpnp_random_state = RandomState()
-    return  _dpnp_random_state
+def _get_random_state(device=None, sycl_queue=None):
+    global _dpnp_random_states
+
+    if not isinstance(_dpnp_random_states, dict):
+         _dpnp_random_states = dict()
+    sycl_queue = dpnp.get_normalized_queue_device(device=device, sycl_queue=sycl_queue)
+    if sycl_queue not in _dpnp_random_states:
+        rs = RandomState(device=device, sycl_queue=sycl_queue)
+        if sycl_queue == rs.get_sycl_queue():
+            _dpnp_random_states[sycl_queue] = rs
+        else:
+            raise RuntimeError("Normalized SYCL queue {} mismatched with one returned by RandmoState {}"
+                               .format(sycl_queue, rs.get_sycl_queue()))
+    return _dpnp_random_states[sycl_queue]
 
 
 def beta(a, b, size=None):
@@ -774,20 +783,42 @@ def negative_binomial(n, p, size=None):
     return call_origin(numpy.random.negative_binomial, n, p, size)
 
 
-def normal(loc=0.0, scale=1.0, size=None, usm_type='device'):
+def normal(loc=0.0,
+           scale=1.0,
+           size=None,
+           device=None,
+           usm_type="device",
+           sycl_queue=None):
     """
-    Normal distribution.
-
     Draw random samples from a normal (Gaussian) distribution.
 
     For full documentation refer to :obj:`numpy.random.normal`.
 
+    Parameters
+    ----------
+    device : {None, string, SyclDevice, SyclQueue}, optional
+        An array API concept of device where the output array is created.
+        The `device` can be ``None`` (the default), an OneAPI filter selector string,
+        an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device,
+        an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by
+        :obj:`dpnp.dpnp_array.dpnp_array.device` property.
+    usm_type : {"device", "shared", "host"}, optional
+        The type of SYCL USM allocation for the output array.
+    sycl_queue : {None, SyclQueue}, optional
+        A SYCL queue to use for output array allocation and copying.
+
+    Returns
+    -------
+    out : dpnp.ndarray
+        Drawn samples from the parameterized normal distribution.
+        Output array data type is the same as input `dtype`. If `dtype` is ``None`` (the default),
+        :obj:`dpnp.float64` type will be used if device supports it, or :obj:`dpnp.float32` otherwise.
+
     Limitations
     -----------
-    Parameters ``loc`` and ``scale`` are supported as scalar.
+    Parameters `loc` and `scale` are supported as scalar.
     Otherwise, :obj:`numpy.random.normal(loc, scale, size)` samples are drawn.
-    Output array data type is :obj:`dpnp.float64` if device supports it
-    or :obj:`dpnp.float32` otherwise.
+    Parameter `dtype` is supported only as :obj:`dpnp.float32`, :obj:`dpnp.float64` or ``None``.
 
     Examples
     --------
@@ -796,11 +827,9 @@ def normal(loc=0.0, scale=1.0, size=None, usm_type='device'):
     >>> s = dpnp.random.normal(mu, sigma, 1000)
 
     """
-    return _get_random_state().normal(loc=loc,
-                                      scale=scale,
-                                      size=size,
-                                      dtype=None,
-                                      usm_type=usm_type)
+
+    rs = _get_random_state(device=device, sycl_queue=sycl_queue)
+    return rs.normal(loc=loc, scale=scale, size=size, dtype=None, usm_type=usm_type)
 
 
 def noncentral_chisquare(df, nonc, size=None):
@@ -986,7 +1015,11 @@ def power(a, size=None):
     return call_origin(numpy.random.power, a, size)
 
 
-def rand(d0, *dn, usm_type="device"):
+def rand(d0,
+         *dn,
+         device=None,
+         usm_type="device",
+         sycl_queue=None):
     """
     Random values in a given shape.
 
@@ -995,10 +1028,24 @@ def rand(d0, *dn, usm_type="device"):
 
     For full documentation refer to :obj:`numpy.random.rand`.
 
-    Limitations
-    -----------
-    Output array data type is :obj:`dpnp.float64` if device supports it
-    or :obj:`dpnp.float32` otherwise.
+    Parameters
+    ----------
+    device : {None, string, SyclDevice, SyclQueue}, optional
+        An array API concept of device where the output array is created.
+        The `device` can be ``None`` (the default), an OneAPI filter selector string,
+        an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device,
+        an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by
+        :obj:`dpnp.dpnp_array.dpnp_array.device` property.
+    usm_type : {"device", "shared", "host"}, optional
+        The type of SYCL USM allocation for the output array.
+    sycl_queue : {None, SyclQueue}, optional
+        A SYCL queue to use for output array allocation and copying.
+
+    Returns
+    -------
+    out : dpnp.ndarray
+        Random values in a given shape.
+        Output array data type is :obj:`dpnp.float64` if device supports it, or :obj:`dpnp.float32` otherwise.
 
     Examples
     --------
@@ -1012,20 +1059,48 @@ def rand(d0, *dn, usm_type="device"):
 
     """
 
-    return _get_random_state().rand(d0, *dn, usm_type=usm_type)
+    rs = _get_random_state(device=device, sycl_queue=sycl_queue)
+    return rs.rand(d0, *dn, usm_type=usm_type)
 
 
-def randint(low, high=None, size=None, dtype=int, usm_type="device"):
+def randint(low,
+            high=None,
+            size=None,
+            dtype=int,
+            device=None,
+            usm_type="device",
+            sycl_queue=None):
     """
     Return random integers from `low` (inclusive) to `high` (exclusive).
 
     For full documentation refer to :obj:`numpy.random.randint`.
 
+    Parameters
+    ----------
+    device : {None, string, SyclDevice, SyclQueue}, optional
+        An array API concept of device where the output array is created.
+        The `device` can be ``None`` (the default), an OneAPI filter selector string,
+        an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device,
+        an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by
+        :obj:`dpnp.dpnp_array.dpnp_array.device` property.
+    usm_type : {"device", "shared", "host"}, optional
+        The type of SYCL USM allocation for the output array.
+    sycl_queue : {None, SyclQueue}, optional
+        A SYCL queue to use for output array allocation and copying.
+
+    Returns
+    -------
+    out : dpnp.ndarray
+        `size`-shaped array of random integers from the appropriate distribution,
+        or a single such random int if `size` is not provided.
+        Output array data type is the same as input `dtype`.
+
     Limitations
     -----------
-    Parameters ``low`` and ``high`` are supported only as scalar.
-    Parameter ``dtype`` is supported only as `int`.
-    Otherwise, :obj:`numpy.random.randint(low, high, size, dtype)` samples are drawn.
+    Parameters `low` and `high` are supported only as a scalar.
+    Parameter `dtype` is supported only as :obj:`dpnp.int32` or ``int``,
+    but ``int`` value is considered to be exactly equivalent to :obj:`dpnp.int32`.
+    Otherwise, :obj:`numpy.random.RandomState.randint(low, high, size, dtype)` samples are drawn.
 
     Examples
     --------
@@ -1041,23 +1116,39 @@ def randint(low, high=None, size=None, dtype=int, usm_type="device"):
 
     """
 
-    return _get_random_state().randint(low=low,
-                                       high=high,
-                                       size=size,
-                                       dtype=dtype,
-                                       usm_type=usm_type)
+    rs = _get_random_state(device=device, sycl_queue=sycl_queue)
+    return rs.randint(low=low, high=high, size=size, dtype=dtype, usm_type=usm_type)
 
 
-def randn(d0, *dn, usm_type="device"):
+def randn(d0,
+          *dn,
+          device=None,
+          usm_type="device",
+          sycl_queue=None):
     """
     Return a sample (or samples) from the "standard normal" distribution.
 
     For full documentation refer to :obj:`numpy.random.randn`.
 
-    Limitations
-    -----------
-    Output array data type is :obj:`dpnp.float64` if device supports it
-    or :obj:`dpnp.float32` otherwise.
+    Parameters
+    ----------
+    device : {None, string, SyclDevice, SyclQueue}, optional
+        An array API concept of device where the output array is created.
+        The `device` can be ``None`` (the default), an OneAPI filter selector string,
+        an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device,
+        an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by
+        :obj:`dpnp.dpnp_array.dpnp_array.device` property.
+    usm_type : {"device", "shared", "host"}, optional
+        The type of SYCL USM allocation for the output array.
+    sycl_queue : {None, SyclQueue}, optional
+        A SYCL queue to use for output array allocation and copying.
+
+    Returns
+    -------
+    out : dpnp.ndarray
+        A ``(d0, d1, ..., dn)``-shaped array of floating-point samples from
+        the standard normal distribution, or a single such float if no parameters were supplied.
+        Output array data type is :obj:`dpnp.float64` if device supports it, or :obj:`dpnp.float32` otherwise.
 
     Examples
     --------
@@ -1075,20 +1166,38 @@ def randn(d0, *dn, usm_type="device"):
 
     """
 
-    return _get_random_state().randn(d0, *dn, usm_type=usm_type)
+    rs = _get_random_state(device=device, sycl_queue=sycl_queue)
+    return rs.randn(d0, *dn, usm_type=usm_type)
 
 
-def random(size=None, usm_type="device"):
+def random(size=None,
+           device=None,
+           usm_type="device",
+           sycl_queue=None):
     """
     Return random floats in the half-open interval [0.0, 1.0).
     Alias for random_sample.
 
     For full documentation refer to :obj:`numpy.random.random`.
 
-    Limitations
-    -----------
-    Output array data type is :obj:`dpnp.float64` if device supports it
-    or :obj:`dpnp.float32` otherwise.
+    Parameters
+    ----------
+    device : {None, string, SyclDevice, SyclQueue}, optional
+        An array API concept of device where the output array is created.
+        The `device` can be ``None`` (the default), an OneAPI filter selector string,
+        an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device,
+        an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by
+        :obj:`dpnp.dpnp_array.dpnp_array.device` property.
+    usm_type : {"device", "shared", "host"}, optional
+        The type of SYCL USM allocation for the output array.
+    sycl_queue : {None, SyclQueue}, optional
+        A SYCL queue to use for output array allocation and copying.
+
+    Returns
+    -------
+    out : dpnp.ndarray
+        Array of random floats of shape `size` (if ``size=None``, zero dimension array with a single float is returned).
+        Output array data type is :obj:`dpnp.float64` if device supports it, or :obj:`dpnp.float32` otherwise.
 
     Examples
     --------
@@ -1102,20 +1211,43 @@ def random(size=None, usm_type="device"):
 
     """
 
-    return random_sample(size=size, usm_type=usm_type)
+    return random_sample(size=size, device=device, usm_type=usm_type, sycl_queue=sycl_queue)
 
 
-def random_integers(low, high=None, size=None, usm_type="device"):
+def random_integers(low,
+                    high=None,
+                    size=None,
+                    device=None,
+                    usm_type="device",
+                    sycl_queue=None):
     """
     Random integers between `low` and `high`, inclusive.
 
     For full documentation refer to :obj:`numpy.random.random_integers`.
 
+    Parameters
+    ----------
+    device : {None, string, SyclDevice, SyclQueue}, optional
+        An array API concept of device where the output array is created.
+        The `device` can be ``None`` (the default), an OneAPI filter selector string,
+        an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device,
+        an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by
+        :obj:`dpnp.dpnp_array.dpnp_array.device` property.
+    usm_type : {"device", "shared", "host"}, optional
+        The type of SYCL USM allocation for the output array.
+    sycl_queue : {None, SyclQueue}, optional
+        A SYCL queue to use for output array allocation and copying.
+
+    Returns
+    -------
+    out : dpnp.ndarray
+        `size`-shaped array of random integers from the appropriate distribution,
+        or a single such random int if `size` is not provided.
+
     Limitations
     -----------
-    Parameters ``low`` and ``high`` are supported as scalar.
-    Otherwise, :obj:`numpy.random.random_integers(low, high, size)` samples
-    are drawn.
+    Parameters `low` and `high` are supported as scalar.
+    Otherwise, :obj:`numpy.random.random_integers(low, high, size)` samples are drawn.
 
     See Also
     --------
@@ -1134,12 +1266,15 @@ def random_integers(low, high=None, size=None, usm_type="device"):
     elif not dpnp.isscalar(high):
         pass
     else:
-        return randint(low, int(high) + 1, size=size, usm_type=usm_type)
+        return randint(low, int(high) + 1, size=size, device=device, usm_type=usm_type, sycl_queue=sycl_queue)
 
     return call_origin(numpy.random.random_integers, low, high, size)
 
 
-def random_sample(size=None, usm_type="device"):
+def random_sample(size=None,
+                  device=None,
+                  usm_type="device",
+                  sycl_queue=None):
     """
     Return random floats in the half-open interval [0.0, 1.0).
 
@@ -1147,10 +1282,24 @@ def random_sample(size=None, usm_type="device"):
 
     For full documentation refer to :obj:`numpy.random.random_sample`.
 
-    Limitations
-    -----------
-    Output array data type is :obj:`dpnp.float64` if device supports it
-    or :obj:`dpnp.float32` otherwise.
+    Parameters
+    ----------
+    device : {None, string, SyclDevice, SyclQueue}, optional
+        An array API concept of device where the output array is created.
+        The `device` can be ``None`` (the default), an OneAPI filter selector string,
+        an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device,
+        an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by
+        :obj:`dpnp.dpnp_array.dpnp_array.device` property.
+    usm_type : {"device", "shared", "host"}, optional
+        The type of SYCL USM allocation for the output array.
+    sycl_queue : {None, SyclQueue}, optional
+        A SYCL queue to use for output array allocation and copying.
+
+    Returns
+    -------
+    out : dpnp.ndarray
+        Array of random floats of shape `size` (if ``size=None``, zero dimension array with a single float is returned).
+        Output array data type is :obj:`dpnp.float64` if device supports it, or :obj:`dpnp.float32` otherwise.
 
     Examples
     --------
@@ -1164,21 +1313,38 @@ def random_sample(size=None, usm_type="device"):
 
     """
 
-    return _get_random_state().random_sample(size=size,
-                                             usm_type=usm_type)
+    rs = _get_random_state(device=device, sycl_queue=sycl_queue)
+    return rs.random_sample(size=size, usm_type=usm_type)
 
 
-def ranf(size=None, usm_type="device"):
+def ranf(size=None,
+         device=None,
+         usm_type="device",
+         sycl_queue=None):
     """
     Return random floats in the half-open interval [0.0, 1.0).
     This is an alias of random_sample.
 
     For full documentation refer to :obj:`numpy.random.ranf`.
 
-    Limitations
-    -----------
-    Output array data type is :obj:`dpnp.float64` if device supports it
-    or :obj:`dpnp.float32` otherwise.
+    Parameters
+    ----------
+    device : {None, string, SyclDevice, SyclQueue}, optional
+        An array API concept of device where the output array is created.
+        The `device` can be ``None`` (the default), an OneAPI filter selector string,
+        an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device,
+        an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by
+        :obj:`dpnp.dpnp_array.dpnp_array.device` property.
+    usm_type : {"device", "shared", "host"}, optional
+        The type of SYCL USM allocation for the output array.
+    sycl_queue : {None, SyclQueue}, optional
+        A SYCL queue to use for output array allocation and copying.
+
+    Returns
+    -------
+    out : dpnp.ndarray
+        Array of random floats of shape `size` (if ``size=None``, zero dimension array with a single float is returned).
+        Output array data type is :obj:`dpnp.float64` if device supports it, or :obj:`dpnp.float32` otherwise.
 
     Examples
     --------
@@ -1193,7 +1359,7 @@ def ranf(size=None, usm_type="device"):
 
     """
 
-    return random_sample(size=size, usm_type=usm_type)
+    return random_sample(size=size, device=device, usm_type=usm_type, sycl_queue=sycl_queue)
 
 
 def rayleigh(scale=1.0, size=None):
@@ -1230,17 +1396,34 @@ def rayleigh(scale=1.0, size=None):
     return call_origin(numpy.random.rayleigh, scale, size)
 
 
-def sample(size=None, usm_type="device"):
+def sample(size=None,
+           device=None,
+           usm_type="device",
+           sycl_queue=None):
     """
     Return random floats in the half-open interval [0.0, 1.0).
     This is an alias of random_sample.
 
     For full documentation refer to :obj:`numpy.random.sample`.
 
-    Limitations
-    -----------
-    Output array data type is :obj:`dpnp.float64` if device supports it
-    or :obj:`dpnp.float32` otherwise.
+    Parameters
+    ----------
+    device : {None, string, SyclDevice, SyclQueue}, optional
+        An array API concept of device where the output array is created.
+        The `device` can be ``None`` (the default), an OneAPI filter selector string,
+        an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device,
+        an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by
+        :obj:`dpnp.dpnp_array.dpnp_array.device` property.
+    usm_type : {"device", "shared", "host"}, optional
+        The type of SYCL USM allocation for the output array.
+    sycl_queue : {None, SyclQueue}, optional
+        A SYCL queue to use for output array allocation and copying.
+
+    Returns
+    -------
+    out : dpnp.ndarray
+        Array of random floats of shape `size` (if ``size=None``, zero dimension array with a single float is returned).
+        Output array data type is :obj:`dpnp.float64` if device supports it, or :obj:`dpnp.float32` otherwise.
 
     Examples
     --------
@@ -1255,7 +1438,7 @@ def sample(size=None, usm_type="device"):
 
     """
 
-    return random_sample(size=size, usm_type=usm_type)
+    return random_sample(size=size, device=device, usm_type=usm_type, sycl_queue=sycl_queue)
 
 
 def shuffle(x1):
@@ -1283,18 +1466,35 @@ def shuffle(x1):
     return
 
 
-def seed(seed=None):
+def seed(seed=None,
+         device=None,
+         sycl_queue=None):
     """
-    Reseed a legacy mt19937 random number generator engine.
+    Reseed a legacy MT19937 random number generator engine.
+
+    Parameters
+    ----------
+    device : {None, string, SyclDevice, SyclQueue}, optional
+        An array API concept of device where an array with generated numbers will be created.
+        The `device` can be ``None`` (the default), an OneAPI filter selector string,
+        an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device,
+        an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by
+        :obj:`dpnp.dpnp_array.dpnp_array.device` property.
+    sycl_queue : {None, SyclQueue}, optional
+        A SYCL queue to use for an array with generated numbers.
 
     Limitations
     -----------
-    Parameter ``seed`` is supported as a scalar.
-    Otherwise, the function will use :obj:`numpy.random.seed` on the backend
-    and will be executed on fallback backend.
+    Parameter `seed` is supported as either a scalar or an array of maximumum three integer scalars.
 
     """
 
+    # update a mt19937 random number for both RandomState and legacy functionality
+    global _dpnp_random_states
+
+    sycl_queue = dpnp.get_normalized_queue_device(device=device, sycl_queue=sycl_queue)
+    _dpnp_random_states[sycl_queue] = RandomState(seed=seed, sycl_queue=sycl_queue)
+
     if not use_origin_backend(seed):
         # TODO:
         # array_like of ints for `seed`
@@ -1307,10 +1507,6 @@ def seed(seed=None):
         else:
             # TODO:
             # migrate to a single approach with RandomState class
-
-            # update a mt19937 random number for both RandomState and legacy functionality
-            global _dpnp_random_state
-            _dpnp_random_state = RandomState(seed)
             dpnp_rng_srand(seed)
 
     # always reseed numpy engine also
@@ -1405,17 +1601,34 @@ def standard_gamma(shape, size=None):
     return call_origin(numpy.random.standard_gamma, shape, size)
 
 
-def standard_normal(size=None, usm_type="device"):
-    """Standard normal distribution.
-
+def standard_normal(size=None,
+                    device=None,
+                    usm_type="device",
+                    sycl_queue=None):
+    """
     Draw samples from a standard Normal distribution (mean=0, stdev=1).
 
     For full documentation refer to :obj:`numpy.random.standard_normal`.
 
-    Limitations
-    -----------
-    Output array data type is :obj:`dpnp.float64` if device supports it
-    or :obj:`dpnp.float32` otherwise.
+    Parameters
+    ----------
+    device : {None, string, SyclDevice, SyclQueue}, optional
+        An array API concept of device where the output array is created.
+        The `device` can be ``None`` (the default), an OneAPI filter selector string,
+        an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device,
+        an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by
+        :obj:`dpnp.dpnp_array.dpnp_array.device` property.
+    usm_type : {"device", "shared", "host"}, optional
+        The type of SYCL USM allocation for the output array.
+    sycl_queue : {None, SyclQueue}, optional
+        A SYCL queue to use for output array allocation and copying.
+
+    Returns
+    -------
+    out : dpnp.ndarray
+        A floating-point array of shape `size` of drawn samples, or a
+        single sample if `size` was not specified.
+        Output array data type is :obj:`dpnp.float64` if device supports it, or :obj:`dpnp.float32` otherwise.
 
     Examples
     --------
@@ -1423,7 +1636,9 @@ def standard_normal(size=None, usm_type="device"):
     >>> s = dpnp.random.standard_normal(1000)
 
     """
-    return _get_random_state().standard_normal(size=size, usm_type=usm_type)
+
+    rs = _get_random_state(device=device, sycl_queue=sycl_queue)
+    return rs.standard_normal(size=size, usm_type=usm_type)
 
 
 def standard_t(df, size=None):
@@ -1506,18 +1721,45 @@ def triangular(left, mode, right, size=None):
     return call_origin(numpy.random.triangular, left, mode, right, size)
 
 
-def uniform(low=0.0, high=1.0, size=None, usm_type='device'):
+def uniform(low=0.0,
+            high=1.0,
+            size=None,
+            device=None,
+            usm_type="device",
+            sycl_queue=None):
     """
     Draw samples from a uniform distribution.
 
+    Samples are uniformly distributed over the half-open interval [low, high) (includes low, but excludes high).
+    In other words, any value within the given interval is equally likely to be drawn by uniform.
+
     For full documentation refer to :obj:`numpy.random.uniform`.
 
+    Parameters
+    ----------
+    device : {None, string, SyclDevice, SyclQueue}, optional
+        An array API concept of device where the output array is created.
+        The `device` can be ``None`` (the default), an OneAPI filter selector string,
+        an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device,
+        an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by
+        :obj:`dpnp.dpnp_array.dpnp_array.device` property.
+    usm_type : {"device", "shared", "host"}, optional
+        The type of SYCL USM allocation for the output array.
+    sycl_queue : {None, SyclQueue}, optional
+        A SYCL queue to use for output array allocation and copying.
+
+    Returns
+    -------
+    out : dpnp.ndarray
+        Drawn samples from the parameterized uniform distribution.
+        Output array data type is the same as input `dtype`. If `dtype` is ``None`` (the default),
+        :obj:`dpnp.float64` type will be used if device supports it, or :obj:`dpnp.float32` otherwise.
+
     Limitations
     -----------
-    Parameters ``low`` and ``high`` are supported as scalar.
-    Otherwise, :obj:`numpy.random.uniform(low, high, size)` samples are drawn.
-    Output array data type is :obj:`dpnp.float64` if device supports it
-    or :obj:`dpnp.float32` otherwise.
+    Parameters `low` and `high` are supported as a scalar. Otherwise,
+    :obj:`numpy.random.uniform(low, high, size)` samples are drawn.
+    Parameter `dtype` is supported only as :obj:`dpnp.int32`, :obj:`dpnp.float32`, :obj:`dpnp.float64` or ``None``.
 
     Examples
     --------
@@ -1530,11 +1772,9 @@ def uniform(low=0.0, high=1.0, size=None, usm_type='device'):
     :obj:`dpnp.random.random` : Floats uniformly distributed over ``[0, 1)``.
 
     """
-    return _get_random_state().uniform(low=low,
-                                       high=high,
-                                       size=size,
-                                       dtype=None,
-                                       usm_type=usm_type)
+
+    rs = _get_random_state(device=device, sycl_queue=sycl_queue)
+    return rs.uniform(low=low, high=high, size=size, dtype=None, usm_type=usm_type)
 
 
 def vonmises(mu, kappa, size=None):
@@ -1679,4 +1919,4 @@ def zipf(a, size=None):
     return call_origin(numpy.random.zipf, a, size)
 
 
-_dpnp_random_state = None
+_dpnp_random_states = {}
diff --git a/dpnp/random/dpnp_random_state.py b/dpnp/random/dpnp_random_state.py
index 1d4648c31c47..c224553b0cff 100644
--- a/dpnp/random/dpnp_random_state.py
+++ b/dpnp/random/dpnp_random_state.py
@@ -1,7 +1,7 @@
 # cython: language_level=3
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2022, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -76,7 +76,12 @@ class RandomState:
     """
 
     def __init__(self, seed=None, device=None, sycl_queue=None):
-        self._seed = 1 if seed is None else seed
+        if seed is None:
+            # ask NumPy to generate an array of three random integers as default seed value
+            self._seed = numpy.random.randint(low=0, high=numpy.iinfo(numpy.int32).max + 1, size=3)
+        else:
+            self._seed = seed
+
         self._sycl_queue = dpnp.get_normalized_queue_device(device=device, sycl_queue=sycl_queue)
         self._sycl_device = self._sycl_queue.sycl_device
 
@@ -290,7 +295,7 @@ def rand(self, *args, usm_type="device"):
 
     def randint(self, low, high=None, size=None, dtype=int, usm_type="device"):
         """
-        Draw random integers from low (inclusive) to high (exclusive).
+        Draw random integers from `low` (inclusive) to `high` (exclusive).
 
         Return random integers from the “discrete uniform” distribution of the specified type
         in the “half-open” interval [low, high).
@@ -332,7 +337,7 @@ def randint(self, low, high=None, size=None, dtype=int, usm_type="device"):
         if not use_origin_backend(low):
             if not dpnp.isscalar(low):
                 pass
-            elif not dpnp.isscalar(high):
+            elif not (high is None or dpnp.isscalar(high)):
                 pass
             else:
                 _dtype = dpnp.int32 if dtype is int else dpnp.dtype(dtype)
diff --git a/dpnp/version.py b/dpnp/version.py
index 160e8ec963a8..f09ea3c76a75 100644
--- a/dpnp/version.py
+++ b/dpnp/version.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2022, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -29,6 +29,6 @@
 DPNP version module
 """
 
-__version__: str = '0.11.0'
+__version__: str = '0.11.1'
 
 version: str = __version__
diff --git a/examples/example4.py b/examples/example4.py
index 0790f84d10aa..6705149d52ba 100755
--- a/examples/example4.py
+++ b/examples/example4.py
@@ -1,7 +1,7 @@
 # cython: language_level=3
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2020, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -40,7 +40,7 @@
 """
 for function in [numpy.sqrt, numpy.fabs, numpy.reciprocal, numpy.square, numpy.cbrt, numpy.degrees, numpy.radians]:
     print()
-    for test_type in [numpy.float64, numpy.float32, numpy.int64, numpy.int32, numpy.bool]:
+    for test_type in [numpy.float64, numpy.float32, numpy.int64, numpy.int32, numpy.bool_]:
         data = numpy.array([1, 2, 3, 4], dtype=test_type)
         result = function(data)
         print(f"input:{data.dtype.name:10}: outout:{result.dtype.name:10}: name:{function.__name__}")
@@ -50,8 +50,8 @@
 """
 for function in [numpy.equal, numpy.arctan2]:
     print()
-    for input1_type in [numpy.float64, numpy.float32, numpy.int64, numpy.int32, numpy.bool]:
-        for input2_type in [numpy.float64, numpy.float32, numpy.int64, numpy.int32, numpy.bool]:
+    for input1_type in [numpy.float64, numpy.float32, numpy.int64, numpy.int32, numpy.bool_]:
+        for input2_type in [numpy.float64, numpy.float32, numpy.int64, numpy.int32, numpy.bool_]:
             data1 = numpy.array([1, 2, 3, 4], dtype=input1_type)
             data2 = numpy.array([11, 21, 31, 41], dtype=input2_type)
             result = function(data1, data2)
diff --git a/tests/conftest.py b/tests/conftest.py
index 78d3180bac08..22276f125f26 100755
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2020, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -77,3 +77,22 @@ def pytest_collection_modifyitems(config, items):
 @pytest.fixture
 def allow_fall_back_on_numpy(monkeypatch):
     monkeypatch.setattr(dpnp.config, '__DPNP_RAISE_EXCEPION_ON_NUMPY_FALLBACK__', 0)
+
+@pytest.fixture
+def suppress_divide_numpy_warnings():
+    # divide: treatment for division by zero (infinite result obtained from finite numbers)
+    old_settings = numpy.seterr(divide='ignore')
+    yield
+    numpy.seterr(**old_settings)  # reset to default
+
+@pytest.fixture
+def suppress_invalid_numpy_warnings():
+    # invalid: treatment for invalid floating-point operation
+    # (result is not an expressible number, typically indicates that a NaN was produced)
+    old_settings = numpy.seterr(invalid='ignore')
+    yield
+    numpy.seterr(**old_settings)  # reset to default
+
+@pytest.fixture
+def suppress_divide_invalid_numpy_warnings(suppress_divide_numpy_warnings, suppress_invalid_numpy_warnings):
+    yield
diff --git a/tests/helper.py b/tests/helper.py
new file mode 100644
index 000000000000..17c62cecd289
--- /dev/null
+++ b/tests/helper.py
@@ -0,0 +1,39 @@
+import dpctl
+import dpnp
+
+
+def get_all_dtypes(no_bool=False,
+                   no_float16=True,
+                   no_complex=False,
+                   no_none=False,
+                   device=None):
+    """
+    Build a list of types supported by DPNP based on input flags and device capabilities.
+    """
+
+    dev = dpctl.select_default_device() if device is None else device
+
+    # add boolean type
+    dtypes = [dpnp.bool] if not no_bool else []
+
+    # add integer types
+    dtypes.extend([dpnp.int32, dpnp.int64])
+
+    # add floating types
+    if not no_float16 and dev.has_aspect_fp16:
+        dtypes.append(dpnp.float16)
+
+    dtypes.append(dpnp.float32)
+    if dev.has_aspect_fp64:
+        dtypes.append(dpnp.float64)
+
+    # add complex types
+    if not no_complex:
+        dtypes.append(dpnp.complex64)
+        if dev.has_aspect_fp64:
+            dtypes.append(dpnp.complex128)
+
+    # add None value to validate a default dtype
+    if not no_none:
+        dtypes.append(None)
+    return dtypes
diff --git a/tests/skipped_tests.tbl b/tests/skipped_tests.tbl
index b8b02e95bbfb..2f0334077a06 100644
--- a/tests/skipped_tests.tbl
+++ b/tests/skipped_tests.tbl
@@ -35,54 +35,42 @@ tests/third_party/intel/test_zero_copy_test1.py::test_dpnp_interaction_with_dpct
 
 tests/test_arraymanipulation.py::TestHstack::test_generator
 tests/test_arraymanipulation.py::TestVstack::test_generator
+
 tests/test_dparray.py::test_astype[[]-float64-float64]
 tests/test_dparray.py::test_astype[[]-float64-float32]
 tests/test_dparray.py::test_astype[[]-float64-int64]
 tests/test_dparray.py::test_astype[[]-float64-int32]
 tests/test_dparray.py::test_astype[[]-float64-bool]
-tests/test_dparray.py::test_astype[[]-float64-bool_]
 tests/test_dparray.py::test_astype[[]-float64-complex]
 tests/test_dparray.py::test_astype[[]-float32-float64]
 tests/test_dparray.py::test_astype[[]-float32-float32]
 tests/test_dparray.py::test_astype[[]-float32-int64]
 tests/test_dparray.py::test_astype[[]-float32-int32]
 tests/test_dparray.py::test_astype[[]-float32-bool]
-tests/test_dparray.py::test_astype[[]-float32-bool_]
 tests/test_dparray.py::test_astype[[]-float32-complex]
 tests/test_dparray.py::test_astype[[]-int64-float64]
 tests/test_dparray.py::test_astype[[]-int64-float32]
 tests/test_dparray.py::test_astype[[]-int64-int64]
 tests/test_dparray.py::test_astype[[]-int64-int32]
 tests/test_dparray.py::test_astype[[]-int64-bool]
-tests/test_dparray.py::test_astype[[]-int64-bool_]
 tests/test_dparray.py::test_astype[[]-int64-complex]
 tests/test_dparray.py::test_astype[[]-int32-float64]
 tests/test_dparray.py::test_astype[[]-int32-float32]
 tests/test_dparray.py::test_astype[[]-int32-int64]
 tests/test_dparray.py::test_astype[[]-int32-int32]
 tests/test_dparray.py::test_astype[[]-int32-bool]
-tests/test_dparray.py::test_astype[[]-int32-bool_]
 tests/test_dparray.py::test_astype[[]-int32-complex]
 tests/test_dparray.py::test_astype[[]-bool-float64]
 tests/test_dparray.py::test_astype[[]-bool-float32]
 tests/test_dparray.py::test_astype[[]-bool-int64]
 tests/test_dparray.py::test_astype[[]-bool-int32]
 tests/test_dparray.py::test_astype[[]-bool-bool]
-tests/test_dparray.py::test_astype[[]-bool-bool_]
 tests/test_dparray.py::test_astype[[]-bool-complex]
-tests/test_dparray.py::test_astype[[]-bool_-float64]
-tests/test_dparray.py::test_astype[[]-bool_-float32]
-tests/test_dparray.py::test_astype[[]-bool_-int64]
-tests/test_dparray.py::test_astype[[]-bool_-int32]
-tests/test_dparray.py::test_astype[[]-bool_-bool]
-tests/test_dparray.py::test_astype[[]-bool_-bool_]
-tests/test_dparray.py::test_astype[[]-bool_-complex]
 tests/test_dparray.py::test_astype[[]-complex-float64]
 tests/test_dparray.py::test_astype[[]-complex-float32]
 tests/test_dparray.py::test_astype[[]-complex-int64]
 tests/test_dparray.py::test_astype[[]-complex-int32]
 tests/test_dparray.py::test_astype[[]-complex-bool]
-tests/test_dparray.py::test_astype[[]-complex-bool_]
 tests/test_dparray.py::test_astype[[]-complex-complex]
 
 tests/test_linalg.py::test_cond[None-[[1, 0, -1], [0, 1, 0], [1, 0, 1]]]
@@ -342,7 +330,6 @@ tests/third_party/cupy/creation_tests/test_basic.py::TestBasic::test_empty_like_
 tests/third_party/cupy/creation_tests/test_basic.py::TestBasic::test_empty_like_K_strides
 tests/third_party/cupy/creation_tests/test_basic.py::TestBasic::test_empty_like_subok
 tests/third_party/cupy/creation_tests/test_basic.py::TestBasic::test_empty_zero_sized_array_strides
-tests/third_party/cupy/creation_tests/test_basic.py::TestBasic::test_eye
 tests/third_party/cupy/creation_tests/test_basic.py::TestBasic::test_full_like_subok
 tests/third_party/cupy/creation_tests/test_basic.py::TestBasic::test_ones_like_subok
 tests/third_party/cupy/creation_tests/test_basic.py::TestBasic::test_zeros_like_subok
@@ -402,7 +389,7 @@ tests/third_party/cupy/creation_tests/test_from_data.py::TestFromData::test_asar
 tests/third_party/cupy/creation_tests/test_from_data.py::TestFromData::test_ascontiguousarray_on_noncontiguous_array
 tests/third_party/cupy/creation_tests/test_from_data.py::TestFromData::test_asfortranarray_cuda_array_zero_dim
 tests/third_party/cupy/creation_tests/test_from_data.py::TestFromData::test_asfortranarray_cuda_array_zero_dim_dtype
-tests/third_party/cupy/creation_tests/test_from_data.py::TestFromData::test_fromfile
+
 tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_0_{copy=False, indexing='xy', sparse=False}::test_meshgrid0
 tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_0_{copy=False, indexing='xy', sparse=False}::test_meshgrid1
 tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_0_{copy=False, indexing='xy', sparse=False}::test_meshgrid2
@@ -778,18 +765,13 @@ tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_para
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_547_{arg1=array([[1, 2, 3],       [4, 5, 6]]), arg2=array([[0, 1, 2],       [3, 4, 5]]), dtype=float64, name='remainder', use_dtype=False}::test_binary
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_549_{arg1=array([[1, 2, 3],       [4, 5, 6]]), arg2=array([[0, 1, 2],       [3, 4, 5]]), dtype=float64, name='mod', use_dtype=False}::test_binary
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticModf::test_modf
-tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_0_{name='reciprocal', nargs=1}::test_raises_with_numpy_input
+
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_10_{name='remainder', nargs=2}::test_raises_with_numpy_input
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_11_{name='mod', nargs=2}::test_raises_with_numpy_input
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_1_{name='angle', nargs=1}::test_raises_with_numpy_input
-tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_2_{name='add', nargs=2}::test_raises_with_numpy_input
-tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_4_{name='divide', nargs=2}::test_raises_with_numpy_input
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_5_{name='power', nargs=2}::test_raises_with_numpy_input
-tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_6_{name='subtract', nargs=2}::test_raises_with_numpy_input
-tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_7_{name='true_divide', nargs=2}::test_raises_with_numpy_input
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_8_{name='floor_divide', nargs=2}::test_raises_with_numpy_input
-tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_9_{name='fmod', nargs=2}::test_raises_with_numpy_input
-tests/third_party/cupy/math_tests/test_arithmetic.py::TestBoolSubtract_param_3_{shape=(), xp=dpnp}::test_bool_subtract
+
 tests/third_party/cupy/math_tests/test_explog.py::TestExplog::test_logaddexp
 tests/third_party/cupy/math_tests/test_explog.py::TestExplog::test_logaddexp2
 tests/third_party/cupy/math_tests/test_floating.py::TestFloating::test_copysign_float
diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl
index 01a2bb21dc92..e6598904e16f 100644
--- a/tests/skipped_tests_gpu.tbl
+++ b/tests/skipped_tests_gpu.tbl
@@ -18,7 +18,6 @@ tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-conjugate-data2]
 tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-copy-data3]
 tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-cumprod-data4]
 tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-cumsum-data5]
-tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-diff-data6]
 tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-ediff1d-data7]
 tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-fabs-data8]
 tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-floor-data9]
@@ -29,11 +28,9 @@ tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-conjugate-data2]
 tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-copy-data3]
 tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-cumprod-data4]
 tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-cumsum-data5]
-tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-diff-data6]
 tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-ediff1d-data7]
 tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-fabs-data8]
 tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-floor-data9]
-tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-gradient-data10]
 tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-nancumprod-data11]
 tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-nancumsum-data12]
 tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-nanprod-data13]
@@ -91,6 +88,7 @@ tests/third_party/cupy/indexing_tests/test_insert.py::TestDiagIndicesInvalidValu
 tests/third_party/cupy/indexing_tests/test_insert.py::TestDiagIndicesFrom_param_0_{shape=(3, 3)}::test_diag_indices_from
 tests/third_party/cupy/indexing_tests/test_insert.py::TestDiagIndicesFrom_param_1_{shape=(0, 0)}::test_diag_indices_from
 tests/third_party/cupy/indexing_tests/test_insert.py::TestDiagIndicesFrom_param_2_{shape=(2, 2, 2)}::test_diag_indices_from
+
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_295_{arg1=array([[1., 2., 3.],       [4., 5., 6.]], dtype=float32), arg2=array([[0, 1, 2],       [3, 4, 5]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_303_{arg1=array([[1., 2., 3.],       [4., 5., 6.]], dtype=float32), arg2=array([[0, 1, 2],       [3, 4, 5]], dtype=int64), dtype=float64, name='floor_divide', use_dtype=False}::test_binary
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_375_{arg1=array([[1., 2., 3.],       [4., 5., 6.]]), arg2=array([[0, 1, 2],       [3, 4, 5]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary
@@ -103,6 +101,7 @@ tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_para
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_527_{arg1=array([[1, 2, 3],       [4, 5, 6]], dtype=int64), arg2=array([[0., 1., 2.],       [3., 4., 5.]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_535_{arg1=array([[1, 2, 3],       [4, 5, 6]], dtype=int64), arg2=array([[0, 1, 2],       [3, 4, 5]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_543_{arg1=array([[1, 2, 3],       [4, 5, 6]], dtype=int64), arg2=array([[0, 1, 2],       [3, 4, 5]], dtype=int64), dtype=float64, name='floor_divide', use_dtype=False}::test_binary
+
 tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_external_prod_all
 tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_external_prod_axis
 tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_external_sum_all
@@ -301,54 +300,42 @@ tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_3_{extern
 tests/third_party/intel/test_zero_copy_test1.py::test_dpnp_interaction_with_dpctl_memory
 tests/test_arraymanipulation.py::TestHstack::test_generator
 tests/test_arraymanipulation.py::TestVstack::test_generator
+
 tests/test_dparray.py::test_astype[[]-float64-float64]
 tests/test_dparray.py::test_astype[[]-float64-float32]
 tests/test_dparray.py::test_astype[[]-float64-int64]
 tests/test_dparray.py::test_astype[[]-float64-int32]
 tests/test_dparray.py::test_astype[[]-float64-bool]
-tests/test_dparray.py::test_astype[[]-float64-bool_]
 tests/test_dparray.py::test_astype[[]-float64-complex]
 tests/test_dparray.py::test_astype[[]-float32-float64]
 tests/test_dparray.py::test_astype[[]-float32-float32]
 tests/test_dparray.py::test_astype[[]-float32-int64]
 tests/test_dparray.py::test_astype[[]-float32-int32]
 tests/test_dparray.py::test_astype[[]-float32-bool]
-tests/test_dparray.py::test_astype[[]-float32-bool_]
 tests/test_dparray.py::test_astype[[]-float32-complex]
 tests/test_dparray.py::test_astype[[]-int64-float64]
 tests/test_dparray.py::test_astype[[]-int64-float32]
 tests/test_dparray.py::test_astype[[]-int64-int64]
 tests/test_dparray.py::test_astype[[]-int64-int32]
 tests/test_dparray.py::test_astype[[]-int64-bool]
-tests/test_dparray.py::test_astype[[]-int64-bool_]
 tests/test_dparray.py::test_astype[[]-int64-complex]
 tests/test_dparray.py::test_astype[[]-int32-float64]
 tests/test_dparray.py::test_astype[[]-int32-float32]
 tests/test_dparray.py::test_astype[[]-int32-int64]
 tests/test_dparray.py::test_astype[[]-int32-int32]
 tests/test_dparray.py::test_astype[[]-int32-bool]
-tests/test_dparray.py::test_astype[[]-int32-bool_]
 tests/test_dparray.py::test_astype[[]-int32-complex]
 tests/test_dparray.py::test_astype[[]-bool-float64]
 tests/test_dparray.py::test_astype[[]-bool-float32]
 tests/test_dparray.py::test_astype[[]-bool-int64]
 tests/test_dparray.py::test_astype[[]-bool-int32]
 tests/test_dparray.py::test_astype[[]-bool-bool]
-tests/test_dparray.py::test_astype[[]-bool-bool_]
 tests/test_dparray.py::test_astype[[]-bool-complex]
-tests/test_dparray.py::test_astype[[]-bool_-float64]
-tests/test_dparray.py::test_astype[[]-bool_-float32]
-tests/test_dparray.py::test_astype[[]-bool_-int64]
-tests/test_dparray.py::test_astype[[]-bool_-int32]
-tests/test_dparray.py::test_astype[[]-bool_-bool]
-tests/test_dparray.py::test_astype[[]-bool_-bool_]
-tests/test_dparray.py::test_astype[[]-bool_-complex]
 tests/test_dparray.py::test_astype[[]-complex-float64]
 tests/test_dparray.py::test_astype[[]-complex-float32]
 tests/test_dparray.py::test_astype[[]-complex-int64]
 tests/test_dparray.py::test_astype[[]-complex-int32]
 tests/test_dparray.py::test_astype[[]-complex-bool]
-tests/test_dparray.py::test_astype[[]-complex-bool_]
 tests/test_dparray.py::test_astype[[]-complex-complex]
 
 tests/test_linalg.py::test_cond[-1-[[1, 2, 3], [4, 5, 6], [7, 8, 9]]]
@@ -577,7 +564,6 @@ tests/third_party/cupy/creation_tests/test_from_data.py::TestFromData::test_asar
 tests/third_party/cupy/creation_tests/test_from_data.py::TestFromData::test_ascontiguousarray_on_noncontiguous_array
 tests/third_party/cupy/creation_tests/test_from_data.py::TestFromData::test_asfortranarray_cuda_array_zero_dim
 tests/third_party/cupy/creation_tests/test_from_data.py::TestFromData::test_asfortranarray_cuda_array_zero_dim_dtype
-tests/third_party/cupy/creation_tests/test_from_data.py::TestFromData::test_fromfile
 
 tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_0_{copy=False, indexing='xy', sparse=False}::test_meshgrid0
 tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_0_{copy=False, indexing='xy', sparse=False}::test_meshgrid1
@@ -870,7 +856,7 @@ tests/third_party/cupy/logic_tests/test_comparison.py::TestArrayEqual::test_arra
 tests/third_party/cupy/logic_tests/test_comparison.py::TestArrayEqual::test_array_equal_diff_length
 tests/third_party/cupy/logic_tests/test_comparison.py::TestArrayEqual::test_array_equal_is_equal
 tests/third_party/cupy/logic_tests/test_comparison.py::TestArrayEqual::test_array_equal_not_equal
-tests/third_party/cupy/logic_tests/test_comparison.py::TestComparisonOperator::test_binary_npscalar_array
+
 tests/third_party/cupy/manipulation_tests/test_dims.py::TestBroadcast_param_0_{shapes=[(), ()]}::test_broadcast
 tests/third_party/cupy/manipulation_tests/test_dims.py::TestBroadcast_param_0_{shapes=[(), ()]}::test_broadcast_arrays
 tests/third_party/cupy/manipulation_tests/test_dims.py::TestBroadcast_param_10_{shapes=[(0, 1, 1, 0, 3), (5, 2, 0, 1, 0, 0, 3), (2, 1, 0, 0, 0, 3)]}::test_broadcast
@@ -981,6 +967,7 @@ tests/third_party/cupy/manipulation_tests/test_tiling.py::TestTile_param_4_{reps
 tests/third_party/cupy/manipulation_tests/test_tiling.py::TestTile_param_5_{reps=(2, 3, 4, 5)}::test_array_tile
 tests/third_party/cupy/manipulation_tests/test_transpose.py::TestTranspose::test_moveaxis_invalid5_2
 tests/third_party/cupy/manipulation_tests/test_transpose.py::TestTranspose::test_moveaxis_invalid5_3
+
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_279_{arg1=array([[1., 2., 3.],       [4., 5., 6.]], dtype=float32), arg2=array([[0., 1., 2.],       [3., 4., 5.]], dtype=float32), dtype=float64, name='floor_divide', use_dtype=False}::test_binary
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_287_{arg1=array([[1., 2., 3.],       [4., 5., 6.]], dtype=float32), arg2=array([[0., 1., 2.],       [3., 4., 5.]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_295_{arg1=array([[1., 2., 3.],       [4., 5., 6.]], dtype=float32), arg2=array([[0, 1, 2],       [3, 4, 5]], dtype=int32), dtype=float64, name='floor_divide', use_dtype=False}::test_binary
@@ -997,19 +984,13 @@ tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_para
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_527_{arg1=array([[1, 2, 3],       [4, 5, 6]]), arg2=array([[0., 1., 2.],       [3., 4., 5.]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_535_{arg1=array([[1, 2, 3],       [4, 5, 6]]), arg2=array([[0, 1, 2],       [3, 4, 5]], dtype=int32), dtype=float64, name='floor_divide', use_dtype=False}::test_binary
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_543_{arg1=array([[1, 2, 3],       [4, 5, 6]]), arg2=array([[0, 1, 2],       [3, 4, 5]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary
-tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticModf::test_modf
-tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_0_{name='reciprocal', nargs=1}::test_raises_with_numpy_input
+
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_10_{name='remainder', nargs=2}::test_raises_with_numpy_input
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_11_{name='mod', nargs=2}::test_raises_with_numpy_input
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_1_{name='angle', nargs=1}::test_raises_with_numpy_input
-tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_2_{name='add', nargs=2}::test_raises_with_numpy_input
-tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_4_{name='divide', nargs=2}::test_raises_with_numpy_input
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_5_{name='power', nargs=2}::test_raises_with_numpy_input
-tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_6_{name='subtract', nargs=2}::test_raises_with_numpy_input
-tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_7_{name='true_divide', nargs=2}::test_raises_with_numpy_input
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_8_{name='floor_divide', nargs=2}::test_raises_with_numpy_input
-tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_9_{name='fmod', nargs=2}::test_raises_with_numpy_input
-tests/third_party/cupy/math_tests/test_arithmetic.py::TestBoolSubtract_param_3_{shape=(), xp=dpnp}::test_bool_subtract
+
 tests/third_party/cupy/math_tests/test_explog.py::TestExplog::test_logaddexp
 tests/third_party/cupy/math_tests/test_explog.py::TestExplog::test_logaddexp2
 tests/third_party/cupy/math_tests/test_floating.py::TestFloating::test_copysign_float
diff --git a/tests/test_arraycreation.py b/tests/test_arraycreation.py
index 5bb9795bbac8..63435bca11f0 100644
--- a/tests/test_arraycreation.py
+++ b/tests/test_arraycreation.py
@@ -1,4 +1,5 @@
 import pytest
+from .helper import get_all_dtypes
 
 import dpnp
 
@@ -8,24 +9,13 @@
 import numpy
 from numpy.testing import (
     assert_allclose,
+    assert_almost_equal,
     assert_array_equal,
     assert_raises
 )
 
 import tempfile
-
-
-# TODO: discuss with DPCTL why no exception on complex128
-def is_dtype_supported(dtype, no_complex_check=False):
-    device = dpctl.SyclQueue().sycl_device
-
-    if dtype is dpnp.float16 and not device.has_aspect_fp16:
-        return False
-    if dtype is dpnp.float64 and not device.has_aspect_fp64:
-        return False
-    if dtype is dpnp.complex128 and not device.has_aspect_fp64 and not no_complex_check:
-        return False
-    return True
+import operator
 
 
 @pytest.mark.parametrize("start",
@@ -37,11 +27,7 @@ def is_dtype_supported(dtype, no_complex_check=False):
 @pytest.mark.parametrize("step",
                          [None, 1, 2.7, -1.6, 100],
                          ids=['None', '1', '2.7', '-1.6', '100'])
-@pytest.mark.parametrize("dtype",
-                         [numpy.complex128, numpy.complex64, numpy.float64, numpy.float32,
-                          numpy.float16, numpy.int64, numpy.int32],
-                         ids=['complex128', 'complex64', 'float64', 'float32',
-                              'float16', 'int64', 'int32'])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_float16=False))
 def test_arange(start, stop, step, dtype):
     rtol_mult = 2
     if numpy.issubdtype(dtype, numpy.float16):
@@ -50,26 +36,23 @@ def test_arange(start, stop, step, dtype):
 
     func = lambda xp: xp.arange(start, stop=stop, step=step, dtype=dtype)
 
-    if not is_dtype_supported(dtype):
-        if stop is None:
-            _stop, _start = start, 0
-        else:
-            _stop, _start = stop, start
-        _step = 1 if step is None else step
-
-        if _start == _stop:
-            pass
-        elif (_step < 0) ^ (_start < _stop):
-            # exception is raising when dpctl calls a kernel function,
-            # i.e. when resulting array is not empty
-            assert_raises(RuntimeError, func, dpnp)
-            return
-
     exp_array = func(numpy)
     res_array = func(dpnp).asnumpy()
 
-    if numpy.issubdtype(dtype, numpy.floating) or numpy.issubdtype(dtype, numpy.complexfloating):
-        assert_allclose(exp_array, res_array, rtol=rtol_mult*numpy.finfo(dtype).eps)
+    if dtype is None:
+        _device = dpctl.SyclQueue().sycl_device
+        if not _device.has_aspect_fp64:
+            # numpy allocated array with dtype=float64 by default,
+            # while dpnp might use float32, if float64 isn't supported by device
+            _dtype = dpnp.float32
+            rtol_mult *= 150
+        else:
+            _dtype = dpnp.float64
+    else:
+        _dtype = dtype
+
+    if numpy.issubdtype(_dtype, numpy.floating) or numpy.issubdtype(_dtype, numpy.complexfloating):
+        assert_allclose(exp_array, res_array, rtol=rtol_mult*numpy.finfo(_dtype).eps)
     else:
         assert_array_equal(exp_array, res_array)
 
@@ -101,43 +84,33 @@ def test_diag(v, k):
 
 
 @pytest.mark.parametrize("N",
-                         [0, 1, 2, 3, 4],
-                         ids=['0', '1', '2', '3', '4'])
+                         [0, 1, 2, 3],
+                         ids=['0', '1', '2', '3'])
 @pytest.mark.parametrize("M",
-                         [None, 0, 1, 2, 3, 4],
-                         ids=['None', '0', '1', '2', '3', '4'])
+                         [None, 0, 1, 2, 3],
+                         ids=['None', '0', '1', '2', '3'])
 @pytest.mark.parametrize("k",
-                         [-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5],
-                         ids=['-5', '-4', '-3', '-2', '-1', '0', '1', '2', '3', '4', '5'])
-@pytest.mark.parametrize("dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=['float64', 'float32', 'int64', 'int32'])
-def test_eye(N, M, k, dtype):
-    expected = numpy.eye(N, M=M, k=k, dtype=dtype)
-    result = dpnp.eye(N, M=M, k=k, dtype=dtype)
-    assert_array_equal(expected, result)
+                         [-4, -3, -2, -1, 0, 1, 2, 3, 4],
+                         ids=['-4', '-3', '-2', '-1', '0', '1', '2', '3', '4'])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False))
+@pytest.mark.parametrize("order",
+                         [None, "C", "F"],
+                         ids=['None', 'C', 'F'])
+def test_eye(N, M, k, dtype, order):
+    func = lambda xp: xp.eye(N, M, k=k, dtype=dtype, order=order)
+    assert_array_equal(func(numpy), func(dpnp))
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
-@pytest.mark.parametrize("dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=['float64', 'float32', 'int64', 'int32'])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False))
 def test_frombuffer(dtype):
-    buffer = b'12345678'
+    buffer = b'12345678ABCDEF00'
     func = lambda xp: xp.frombuffer(buffer, dtype=dtype)
-
-    if not is_dtype_supported(dtype):
-        # dtpcl intercepts RuntimeError about 'double' type and raise ValueError instead
-        assert_raises(ValueError, func, dpnp)
-        return
-
-    assert_array_equal(func(dpnp), func(numpy))
+    assert_allclose(func(dpnp), func(numpy))
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
-@pytest.mark.parametrize("dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=['float64', 'float32', 'int64', 'int32'])
+@pytest.mark.parametrize("dtype", get_all_dtypes())
 def test_fromfile(dtype):
     with tempfile.TemporaryFile() as fh:
         fh.write(b"\x00\x01\x02\x03\x04\x05\x06\x07\x08")
@@ -145,76 +118,44 @@ def test_fromfile(dtype):
 
         func = lambda xp: xp.fromfile(fh, dtype=dtype)
 
-        if not is_dtype_supported(dtype):
-            fh.seek(0)
-            # dtpcl intercepts RuntimeError about 'double' type and raise ValueError instead
-            assert_raises(ValueError, func, dpnp)
-            return
-
         fh.seek(0)
         np_res = func(numpy)
 
         fh.seek(0)
         dpnp_res = func(dpnp)
 
-        assert_array_equal(dpnp_res, np_res)
+        assert_almost_equal(dpnp_res, np_res)
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
-@pytest.mark.parametrize("dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=['float64', 'float32', 'int64', 'int32'])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_float16=False))
 def test_fromfunction(dtype):
     def func(x, y):
         return x * y
 
     shape = (3, 3)
     call_func = lambda xp: xp.fromfunction(func, shape=shape, dtype=dtype)
-
-    if not is_dtype_supported(dtype):
-        # dtpcl intercepts RuntimeError about 'double' type and raise ValueError instead
-        assert_raises(ValueError, call_func, dpnp)
-        return
-
     assert_array_equal(call_func(dpnp), call_func(numpy))
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
-@pytest.mark.parametrize("dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=['float64', 'float32', 'int64', 'int32'])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False))
 def test_fromiter(dtype):
     _iter = [1, 2, 3, 4]
     func = lambda xp: xp.fromiter(_iter, dtype=dtype)
-
-    if not is_dtype_supported(dtype):
-        # dtpcl intercepts RuntimeError about 'double' type and raise ValueError instead
-        assert_raises(ValueError, func, dpnp)
-        return
-
     assert_array_equal(func(dpnp), func(numpy))
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
-@pytest.mark.parametrize("dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=['float64', 'float32', 'int64', 'int32'])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False))
 def test_fromstring(dtype):
     string = "1 2 3 4"
     func = lambda xp: xp.fromstring(string, dtype=dtype, sep=' ')
-
-    if not is_dtype_supported(dtype):
-        # dtpcl intercepts RuntimeError about 'double' type and raise ValueError instead
-        assert_raises(ValueError, func, dpnp)
-        return
-
     assert_array_equal(func(dpnp), func(numpy))
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
-@pytest.mark.parametrize("dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=['float64', 'float32', 'int64', 'int32'])
+@pytest.mark.parametrize("dtype", get_all_dtypes())
 @pytest.mark.parametrize("num",
                          [2, 4, 8, 3, 9, 27])
 @pytest.mark.parametrize("endpoint",
@@ -225,11 +166,6 @@ def test_geomspace(dtype, num, endpoint):
 
     func = lambda xp: xp.geomspace(start, stop, num, endpoint, dtype)
 
-    if not is_dtype_supported(dtype):
-        # dtpcl intercepts RuntimeError about 'double' type and raise ValueError instead
-        assert_raises(ValueError, func, dpnp)
-        return
-
     np_res = func(numpy)
     dpnp_res = func(dpnp)
 
@@ -244,25 +180,14 @@ def test_geomspace(dtype, num, endpoint):
 @pytest.mark.parametrize("n",
                          [0, 1, 4],
                          ids=['0', '1', '4'])
-@pytest.mark.parametrize("dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32,
-                          numpy.bool, numpy.complex64, numpy.complex128, None],
-                         ids=['float64', 'float32', 'int64', 'int32',
-                              'bool', 'complex64', 'complex128', 'None'])
+@pytest.mark.parametrize("dtype", get_all_dtypes())
 def test_identity(n, dtype):
     func = lambda xp: xp.identity(n, dtype=dtype)
-
-    if n > 0 and not is_dtype_supported(dtype):
-        assert_raises(RuntimeError, func, dpnp)
-        return
-
     assert_array_equal(func(numpy), func(dpnp))
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
-@pytest.mark.parametrize("dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=['float64', 'float32', 'int64', 'int32'])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False))
 def test_loadtxt(dtype):
     func = lambda xp: xp.loadtxt(fh, dtype=dtype)
 
@@ -270,12 +195,6 @@ def test_loadtxt(dtype):
         fh.write(b"1 2 3 4")
         fh.flush()
 
-        if not is_dtype_supported(dtype):
-            # dtpcl intercepts RuntimeError about 'double' type and raise ValueError instead
-            fh.seek(0)
-            assert_raises(ValueError, func, dpnp)
-            return
-
         fh.seek(0)
         np_res = func(numpy)
         fh.seek(0)
@@ -284,12 +203,8 @@ def test_loadtxt(dtype):
         assert_array_equal(dpnp_res, np_res)
 
 
-@pytest.mark.parametrize("dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32, None],
-                         ids=['float64', 'float32', 'int64', 'int32', 'None'])
-@pytest.mark.parametrize("type",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=['float64', 'float32', 'int64', 'int32'])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
+@pytest.mark.parametrize("type", get_all_dtypes(no_bool=True, no_complex=True))
 @pytest.mark.parametrize("offset",
                          [0, 1],
                          ids=['0', '1'])
@@ -317,21 +232,9 @@ def test_trace(array, offset, type, dtype):
     create_array = lambda xp: xp.array(array, type)
     trace_func = lambda xp, x: xp.trace(x, offset=offset, dtype=dtype)
 
-    if not is_dtype_supported(type):
-        # dtpcl intercepts RuntimeError about 'double' type and raise ValueError instead
-        assert_raises(ValueError, create_array, dpnp)
-        return
-
     a = create_array(numpy)
     ia = create_array(dpnp)
-
-    if not is_dtype_supported(dtype):
-        assert_raises(RuntimeError, trace_func, dpnp, ia)
-        return
-
-    expected = trace_func(numpy, a)
-    result = trace_func(dpnp, ia)
-    assert_array_equal(expected, result)
+    assert_array_equal(trace_func(dpnp, ia), trace_func(numpy, a))
 
 
 @pytest.mark.parametrize("N",
@@ -343,16 +246,9 @@ def test_trace(array, offset, type, dtype):
 @pytest.mark.parametrize("k",
                          [-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5],
                          ids=['-5', '-4', '-3', '-2', '-1', '0', '1', '2', '3', '4', '5'])
-@pytest.mark.parametrize("dtype",
-                         [numpy.float64, numpy.float32, float, numpy.int64, numpy.int32, numpy.int, numpy.float, int],
-                         ids=['float64', 'float32', 'numpy.float', 'float', 'int64', 'int32', 'numpy.int', 'int'])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
 def test_tri(N, M, k, dtype):
     func = lambda xp: xp.tri(N, M, k, dtype=dtype)
-
-    if M > 0 and N > 0 and not is_dtype_supported(dtype):
-        assert_raises(RuntimeError, func, dpnp)
-        return
-
     assert_array_equal(func(dpnp), func(numpy))
 
 
@@ -363,48 +259,50 @@ def test_tri_default_dtype():
 
 
 @pytest.mark.parametrize("k",
-                         [-6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6],
-                         ids=['-6', '-5', '-4', '-3', '-2', '-1', '0', '1', '2', '3', '4', '5', '6'])
+                         [-3, -2, -1, 0, 1, 2, 3, 4, 5,
+                          numpy.array(1), dpnp.array(2), dpt.asarray(3)],
+                         ids=['-3', '-2', '-1', '0', '1', '2', '3', '4', '5',
+                              'np.array(1)', 'dpnp.array(2)', 'dpt.asarray(3)'])
 @pytest.mark.parametrize("m",
-                         [[0, 1, 2, 3, 4],
-                          [1, 1, 1, 1, 1],
-                          [[0, 0], [0, 0]],
+                         [[[0, 0], [0, 0]],
                           [[1, 2], [1, 2]],
                           [[1, 2], [3, 4]],
                           [[0, 1, 2], [3, 4, 5], [6, 7, 8]],
                           [[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]]],
-                         ids=['[0, 1, 2, 3, 4]',
-                              '[1, 1, 1, 1, 1]',
-                              '[[0, 0], [0, 0]]',
+                         ids=['[[0, 0], [0, 0]]',
                               '[[1, 2], [1, 2]]',
                               '[[1, 2], [3, 4]]',
                               '[[0, 1, 2], [3, 4, 5], [6, 7, 8]]',
                               '[[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]]'])
-def test_tril(m, k):
-    a = numpy.array(m)
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False))
+def test_tril(m, k, dtype):
+    a = numpy.array(m, dtype=dtype)
     ia = dpnp.array(a)
-    expected = numpy.tril(a, k)
-    result = dpnp.tril(ia, k)
+    expected = numpy.tril(a, k=k)
+    result = dpnp.tril(ia, k=k)
     assert_array_equal(expected, result)
 
 
 @pytest.mark.parametrize("k",
-                         [-4, -3, -2, -1, 0, 1, 2, 3, 4],
-                         ids=['-4', '-3', '-2', '-1', '0', '1', '2', '3', '4'])
+                         [-3, -2, -1, 0, 1, 2, 3, 4, 5,
+                          numpy.array(1), dpnp.array(2), dpt.asarray(3)],
+                         ids=['-3', '-2', '-1', '0', '1', '2', '3', '4', '5',
+                              'np.array(1)', 'dpnp.array(2)', 'dpt.asarray(3)'])
 @pytest.mark.parametrize("m",
-                         [[0, 1, 2, 3, 4],
-                          [[1, 2], [3, 4]],
+                         [[[1, 2], [3, 4]],
                           [[0, 1, 2], [3, 4, 5], [6, 7, 8]],
                           [[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]]],
-                         ids=['[0, 1, 2, 3, 4]',
-                              '[[1, 2], [3, 4]]',
+                         ids=['[[1, 2], [3, 4]]',
                               '[[0, 1, 2], [3, 4, 5], [6, 7, 8]]',
                               '[[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]]'])
-def test_triu(m, k):
-    a = numpy.array(m)
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False))
+def test_triu(m, k, dtype):
+    a = numpy.array(m, dtype=dtype)
     ia = dpnp.array(a)
-    expected = numpy.triu(a, k)
-    result = dpnp.triu(ia, k)
+    expected = numpy.triu(a, k=k)
+    result = dpnp.triu(ia, k=k)
     assert_array_equal(expected, result)
 
 
@@ -414,8 +312,8 @@ def test_triu(m, k):
 def test_triu_size_null(k):
     a = numpy.ones(shape=(1, 2, 0))
     ia = dpnp.array(a)
-    expected = numpy.triu(a, k)
-    result = dpnp.triu(ia, k)
+    expected = numpy.triu(a, k=k)
+    result = dpnp.triu(ia, k=k)
     assert_array_equal(expected, result)
 
 
@@ -426,11 +324,7 @@ def test_triu_size_null(k):
                          ids=['[1, 2, 3, 4]',
                               '[]',
                               '[0, 3, 5]'])
-@pytest.mark.parametrize("dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32,
-                          numpy.bool, numpy.complex64, numpy.complex128],
-                         ids=['float64', 'float32', 'int64', 'int32',
-                              'bool', 'complex64', 'complex128'])
+@pytest.mark.parametrize("dtype", get_all_dtypes())
 @pytest.mark.parametrize("n",
                          [0, 1, 4, None],
                          ids=['0', '1', '4', 'None'])
@@ -441,18 +335,8 @@ def test_vander(array, dtype, n, increase):
     create_array = lambda xp: xp.array(array, dtype=dtype)
     vander_func = lambda xp, x: xp.vander(x, N=n, increasing=increase)
 
-    if array and not is_dtype_supported(dtype):
-        # dtpcl intercepts RuntimeError about 'double' type and raise ValueError instead
-        assert_raises(ValueError, create_array, dpnp)
-        return
-
     a_np = numpy.array(array, dtype=dtype)
     a_dpnp = dpnp.array(array, dtype=dtype)
-
-    if array and not is_dtype_supported(dtype):
-        assert_raises(RuntimeError, vander_func, dpnp, a_dpnp)
-        return
-
     assert_array_equal(vander_func(numpy, a_np), vander_func(dpnp, a_dpnp))
 
 
@@ -462,21 +346,12 @@ def test_vander(array, dtype, n, increase):
 @pytest.mark.parametrize("fill_value",
                          [1.5, 2, 1.5+0.j],
                          ids=['1.5', '2', '1.5+0.j'])
-@pytest.mark.parametrize("dtype",
-                         [None, numpy.complex128, numpy.complex64, numpy.float64, numpy.float32,
-                          numpy.float16, numpy.int64, numpy.int32, numpy.bool],
-                         ids=['None', 'complex128', 'complex64', 'float64', 'float32',
-                              'float16', 'int64', 'int32', 'bool'])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False))
 @pytest.mark.parametrize("order",
                          [None, "C", "F"],
                          ids=['None', 'C', 'F'])
 def test_full(shape, fill_value, dtype, order):
     func = lambda xp: xp.full(shape, fill_value, dtype=dtype, order=order)
-
-    if shape != 0 and not 0 in shape and not is_dtype_supported(dtype, no_complex_check=True):
-        assert_raises(RuntimeError, func, dpnp)
-        return
-
     assert_array_equal(func(numpy), func(dpnp))
 
 
@@ -486,23 +361,15 @@ def test_full(shape, fill_value, dtype, order):
 @pytest.mark.parametrize("fill_value",
                          [1.5, 2, 1.5+0.j],
                          ids=['1.5', '2', '1.5+0.j'])
-@pytest.mark.parametrize("dtype",
-                         [None, numpy.complex128, numpy.complex64, numpy.float64, numpy.float32,
-                          numpy.float16, numpy.int64, numpy.int32, numpy.bool],
-                         ids=['None', 'complex128', 'complex64', 'float64', 'float32',
-                              'float16', 'int64', 'int32', 'bool'])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False))
 @pytest.mark.parametrize("order",
                          [None, "C", "F"],
                          ids=['None', 'C', 'F'])
 def test_full_like(array, fill_value, dtype, order):
-    a = numpy.array(array)
-    ia = dpnp.array(array)
     func = lambda xp, x: xp.full_like(x, fill_value, dtype=dtype, order=order)
 
-    if ia.size and not is_dtype_supported(dtype, no_complex_check=True):
-        assert_raises(RuntimeError, func, dpnp, ia)
-        return
-    
+    a = numpy.array(array)
+    ia = dpnp.array(array)
     assert_array_equal(func(numpy, a), func(dpnp, ia))
 
 
@@ -534,7 +401,9 @@ def test_full_strides():
     assert_array_equal(dpnp.asnumpy(ia), a)
 
 
-@pytest.mark.parametrize("fill_value", [[], (), dpnp.full(0, 0)], ids=['[]', '()', 'dpnp.full(0, 0)'])
+@pytest.mark.parametrize("fill_value",
+                         [[], (), dpnp.full(0, 0)],
+                         ids=['[]', '()', 'dpnp.full(0, 0)'])
 def test_full_invalid_fill_value(fill_value):
     with pytest.raises(ValueError):
         dpnp.full(10, fill_value=fill_value)
@@ -543,120 +412,102 @@ def test_full_invalid_fill_value(fill_value):
 @pytest.mark.parametrize("shape",
                          [(), 0, (0,), (2, 0, 3), (3, 2)],
                          ids=['()', '0', '(0,)', '(2, 0, 3)', '(3, 2)'])
-@pytest.mark.parametrize("dtype",
-                         [None, numpy.complex128, numpy.complex64, numpy.float64, numpy.float32,
-                          numpy.float16, numpy.int64, numpy.int32, numpy.bool],
-                         ids=['None', 'complex128', 'complex64', 'float64', 'float32',
-                              'float16', 'int64', 'int32', 'bool'])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False))
 @pytest.mark.parametrize("order",
                          [None, "C", "F"],
                          ids=['None', 'C', 'F'])
 def test_zeros(shape, dtype, order):
-    expected = numpy.zeros(shape, dtype=dtype, order=order)
-    result = dpnp.zeros(shape, dtype=dtype, order=order)
-
-    assert_array_equal(expected, result)
+    func = lambda xp: xp.zeros(shape, dtype=dtype, order=order)
+    assert_array_equal(func(numpy), func(dpnp))
 
 
 @pytest.mark.parametrize("array",
                          [[], 0,  [1, 2, 3], [[1, 2], [3, 4]]],
                          ids=['[]', '0',  '[1, 2, 3]', '[[1, 2], [3, 4]]'])
-@pytest.mark.parametrize("dtype",
-                         [None, numpy.complex128, numpy.complex64, numpy.float64, numpy.float32,
-                          numpy.float16, numpy.int64, numpy.int32, numpy.bool],
-                         ids=['None', 'complex128', 'complex64', 'float64', 'float32',
-                              'float16', 'int64', 'int32', 'bool'])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False))
 @pytest.mark.parametrize("order",
                          [None, "C", "F"],
                          ids=['None', 'C', 'F'])
 def test_zeros_like(array, dtype, order):
+    func = lambda xp, x: xp.zeros_like(x, dtype=dtype, order=order)
+
     a = numpy.array(array)
     ia = dpnp.array(array)
-
-    expected = numpy.zeros_like(a, dtype=dtype, order=order)
-    result = dpnp.zeros_like(ia, dtype=dtype, order=order)
-
-    assert_array_equal(expected, result)
+    assert_array_equal(func(numpy, a), func(dpnp, ia))
 
 
 @pytest.mark.parametrize("shape",
                          [(), 0, (0,), (2, 0, 3), (3, 2)],
                          ids=['()', '0', '(0,)', '(2, 0, 3)', '(3, 2)'])
-@pytest.mark.parametrize("dtype",
-                         [None, numpy.complex128, numpy.complex64, numpy.float64, numpy.float32,
-                          numpy.float16, numpy.int64, numpy.int32, numpy.bool],
-                         ids=['None', 'complex128', 'complex64', 'float64', 'float32',
-                              'float16', 'int64', 'int32', 'bool'])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False))
 @pytest.mark.parametrize("order",
                          [None, "C", "F"],
                          ids=['None', 'C', 'F'])
 def test_empty(shape, dtype, order):
-    expected = numpy.empty(shape, dtype=dtype, order=order)
-    result = dpnp.empty(shape, dtype=dtype, order=order)
-
-    assert expected.shape == result.shape
+    func = lambda xp: xp.empty(shape, dtype=dtype, order=order)
+    assert func(numpy).shape == func(dpnp).shape
 
 
 @pytest.mark.parametrize("array",
                          [[], 0,  [1, 2, 3], [[1, 2], [3, 4]]],
                          ids=['[]', '0',  '[1, 2, 3]', '[[1, 2], [3, 4]]'])
-@pytest.mark.parametrize("dtype",
-                         [None, numpy.complex128, numpy.complex64, numpy.float64, numpy.float32,
-                          numpy.float16, numpy.int64, numpy.int32, numpy.bool],
-                         ids=['None', 'complex128', 'complex64', 'float64', 'float32',
-                              'float16', 'int64', 'int32', 'bool'])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False))
 @pytest.mark.parametrize("order",
                          [None, "C", "F"],
                          ids=['None', 'C', 'F'])
 def test_empty_like(array, dtype, order):
+    func = lambda xp, x: xp.empty_like(x, dtype=dtype, order=order)
+
     a = numpy.array(array)
     ia = dpnp.array(array)
-
-    expected = numpy.empty_like(a, dtype=dtype, order=order)
-    result = dpnp.empty_like(ia, dtype=dtype, order=order)
-
-    assert expected.shape == result.shape
+    assert func(numpy, a).shape == func(dpnp, ia).shape
 
 
 @pytest.mark.parametrize("shape",
                          [(), 0, (0,), (2, 0, 3), (3, 2)],
                          ids=['()', '0', '(0,)', '(2, 0, 3)', '(3, 2)'])
-@pytest.mark.parametrize("dtype",
-                         [None, numpy.complex128, numpy.complex64, numpy.float64, numpy.float32, 
-                          numpy.float16, numpy.int64, numpy.int32, numpy.bool],
-                         ids=['None', 'complex128', 'complex64', 'float64', 'float32',
-                         'float16', 'int64', 'int32', 'bool'])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False))
 @pytest.mark.parametrize("order",
                          [None, "C", "F"],
                          ids=['None', 'C', 'F'])
 def test_ones(shape, dtype, order):
     func = lambda xp: xp.ones(shape, dtype=dtype, order=order)
-
-    if shape != 0 and not 0 in shape and not is_dtype_supported(dtype, no_complex_check=True):
-        assert_raises(RuntimeError, func, dpnp)
-        return
-
     assert_array_equal(func(numpy), func(dpnp))
 
 
 @pytest.mark.parametrize("array",
                          [[], 0,  [1, 2, 3], [[1, 2], [3, 4]]],
                          ids=['[]', '0',  '[1, 2, 3]', '[[1, 2], [3, 4]]'])
-@pytest.mark.parametrize("dtype",
-                         [None, numpy.complex128, numpy.complex64, numpy.float64, numpy.float32, 
-                          numpy.float16, numpy.int64, numpy.int32, numpy.bool],
-                         ids=['None', 'complex128', 'complex64', 'float64', 'float32',
-                         'float16', 'int64', 'int32', 'bool'])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False))
 @pytest.mark.parametrize("order",
                          [None, "C", "F"],
                          ids=['None', 'C', 'F'])
 def test_ones_like(array, dtype, order):
+    func = lambda xp, x: xp.ones_like(x, dtype=dtype, order=order)
+
     a = numpy.array(array)
     ia = dpnp.array(array)
-    func = lambda xp, x: xp.ones_like(x, dtype=dtype, order=order)
+    assert_array_equal(func(numpy, a), func(dpnp, ia))
 
-    if ia.size and not is_dtype_supported(dtype, no_complex_check=True):
-        assert_raises(RuntimeError, func, dpnp, ia)
-        return
 
-    assert_array_equal(func(numpy, a), func(dpnp, ia))
+@pytest.mark.parametrize(
+    "func, args",
+    [
+        pytest.param("full_like",
+                     ['x0', '4']),
+        pytest.param("zeros_like",
+                     ['x0']),
+        pytest.param("ones_like",
+                     ['x0']),
+        pytest.param("empty_like",
+                     ['x0']),
+    ])
+def test_dpctl_tensor_input(func, args):
+    x0 = dpt.reshape(dpt.arange(9), (3,3))
+    new_args = [eval(val, {'x0' : x0}) for val in args]
+    X = getattr(dpt, func)(*new_args)
+    Y = getattr(dpnp, func)(*new_args)
+    if func is 'empty_like':
+        assert X.shape == Y.shape
+    else:
+        assert_array_equal(X, Y)
diff --git a/tests/test_dparray.py b/tests/test_dparray.py
index 6ff1672b853b..62a0120f8a33 100644
--- a/tests/test_dparray.py
+++ b/tests/test_dparray.py
@@ -1,15 +1,17 @@
-import dpnp
-import numpy
 import pytest
+from .helper import get_all_dtypes
+
+import dpnp
 import dpctl.tensor as dpt
 
+import numpy
+from numpy.testing import (
+    assert_array_equal
+)
+
 
-@pytest.mark.parametrize("res_dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32, numpy.bool, numpy.bool_, numpy.complex],
-                         ids=['float64', 'float32', 'int64', 'int32', 'bool', 'bool_', 'complex'])
-@pytest.mark.parametrize("arr_dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32, numpy.bool, numpy.bool_, numpy.complex],
-                         ids=['float64', 'float32', 'int64', 'int32', 'bool', 'bool_', 'complex'])
+@pytest.mark.parametrize("res_dtype", get_all_dtypes())
+@pytest.mark.parametrize("arr_dtype", get_all_dtypes())
 @pytest.mark.parametrize("arr",
                          [[-2, -1, 0, 1, 2], [[-2, -1], [1, 2]], []],
                          ids=['[-2, -1, 0, 1, 2]', '[[-2, -1], [1, 2]]', '[]'])
@@ -18,12 +20,10 @@ def test_astype(arr, arr_dtype, res_dtype):
     dpnp_array = dpnp.array(numpy_array)
     expected = numpy_array.astype(res_dtype)
     result = dpnp_array.astype(res_dtype)
-    numpy.testing.assert_array_equal(expected, result)
+    assert_array_equal(expected, result)
 
 
-@pytest.mark.parametrize("arr_dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32, numpy.bool, numpy.bool_, numpy.complex],
-                         ids=['float64', 'float32', 'int64', 'int32', 'bool', 'bool_', 'complex'])
+@pytest.mark.parametrize("arr_dtype", get_all_dtypes())
 @pytest.mark.parametrize("arr",
                          [[-2, -1, 0, 1, 2], [[-2, -1], [1, 2]], []],
                          ids=['[-2, -1, 0, 1, 2]', '[[-2, -1], [1, 2]]', '[]'])
@@ -32,7 +32,7 @@ def test_flatten(arr, arr_dtype):
     dpnp_array = dpnp.array(arr, dtype=arr_dtype)
     expected = numpy_array.flatten()
     result = dpnp_array.flatten()
-    numpy.testing.assert_array_equal(expected, result)
+    assert_array_equal(expected, result)
 
 
 @pytest.mark.parametrize("shape",
@@ -51,7 +51,7 @@ def test_flags(shape, order):
 
 
 @pytest.mark.parametrize("dtype",
-                         [numpy.complex64, numpy.float32, numpy.int64, numpy.int32, numpy.bool],
+                         [numpy.complex64, numpy.float32, numpy.int64, numpy.int32, numpy.bool_],
                          ids=['complex64', 'float32', 'int64', 'int32', 'bool'])
 @pytest.mark.parametrize("strides",
                          [(1, 4) , (4, 1)],
@@ -68,3 +68,134 @@ def test_flags_strides(dtype, order, strides):
     assert usm_array.flags == dpnp_array.flags
     assert numpy_array.flags.c_contiguous == dpnp_array.flags.c_contiguous
     assert numpy_array.flags.f_contiguous == dpnp_array.flags.f_contiguous
+
+def test_print_dpnp_int():
+    result = repr(dpnp.array([1, 0, 2, -3, -1, 2, 21, -9], dtype='i4'))
+    expected = "array([ 1,  0,  2, -3, -1,  2, 21, -9], dtype=int32)"
+    assert(result==expected)
+
+    result = str(dpnp.array([1, 0, 2, -3, -1, 2, 21, -9], dtype='i4'))
+    expected = "[ 1  0  2 -3 -1  2 21 -9]"
+    assert(result==expected)
+# int32
+    result = repr(dpnp.array([1, -1, 21], dtype=dpnp.int32))
+    expected = "array([ 1, -1, 21], dtype=int32)"
+    assert(result==expected)
+
+    result = str(dpnp.array([1, -1, 21], dtype=dpnp.int32))
+    expected = "[ 1 -1 21]"
+    assert(result==expected)
+# uint8
+    result = repr(dpnp.array([1, 0, 3], dtype=numpy.uint8))
+    expected = "array([1, 0, 3], dtype=uint8)"
+    assert(result==expected)
+
+    result = str(dpnp.array([1, 0, 3], dtype=numpy.uint8))
+    expected = "[1 0 3]"
+    assert(result==expected)
+
+def test_print_dpnp_float():
+    result = repr(dpnp.array([1, -1, 21], dtype=float))
+    expected = "array([ 1., -1., 21.])"
+    assert(result==expected)
+
+    result = str(dpnp.array([1, -1, 21], dtype=float))
+    expected = "[ 1. -1. 21.]"
+    assert(result==expected)
+# float32
+    result = repr(dpnp.array([1, -1, 21], dtype=dpnp.float32))
+    expected = "array([ 1., -1., 21.], dtype=float32)"
+    assert(result==expected)
+
+    result = str(dpnp.array([1, -1, 21], dtype=dpnp.float32))
+    expected = "[ 1. -1. 21.]"
+    assert(result==expected)
+
+def test_print_dpnp_complex():
+    result = repr(dpnp.array([1, -1, 21], dtype=complex))
+    expected = "array([ 1.+0.j, -1.+0.j, 21.+0.j])"
+    assert(result==expected)
+
+    result = str(dpnp.array([1, -1, 21], dtype=complex))
+    expected = "[ 1.+0.j -1.+0.j 21.+0.j]"
+    assert(result==expected)
+
+def test_print_dpnp_boolean():
+    result = repr(dpnp.array([1, 0, 3], dtype=bool))
+    expected = "array([ True, False,  True])"
+    assert(result==expected)
+
+    result = str(dpnp.array([1, 0, 3], dtype=bool))
+    expected = "[ True False  True]"
+    assert(result==expected)
+
+def test_print_dpnp_special_character():
+# NaN
+    result = repr(dpnp.array([1., 0., dpnp.nan, 3.]))
+    expected = "array([ 1.,  0., nan,  3.])"
+    assert(result==expected)
+
+    result = str(dpnp.array([1., 0., dpnp.nan, 3.]))
+    expected = "[ 1.  0. nan  3.]"
+    assert(result==expected)
+# inf
+    result = repr(dpnp.array([1., 0., numpy.inf, 3.]))
+    expected = "array([ 1.,  0., inf,  3.])"
+    assert(result==expected)
+
+    result = str(dpnp.array([1., 0., numpy.inf, 3.]))
+    expected = "[ 1.  0. inf  3.]"
+    assert(result==expected)
+
+def test_print_dpnp_nd():
+# 1D
+    result = repr(dpnp.arange(10000, dtype='float32'))
+    expected = "array([0.000e+00, 1.000e+00, 2.000e+00, ..., 9.997e+03, 9.998e+03,\n       9.999e+03], dtype=float32)"
+    assert(result==expected)
+
+    result = str(dpnp.arange(10000, dtype='float32'))
+    expected = "[0.000e+00 1.000e+00 2.000e+00 ... 9.997e+03 9.998e+03 9.999e+03]"
+    assert(result==expected)
+
+# 2D
+    result = repr(dpnp.array([[1, 2], [3, 4]], dtype=float))
+    expected = "array([[1., 2.],\n       [3., 4.]])"
+    assert(result==expected)
+
+    result = str(dpnp.array([[1, 2], [3, 4]]))
+    expected = "[[1 2]\n [3 4]]"
+    assert(result==expected)
+
+# 0 shape
+    result = repr(dpnp.empty( shape=(0, 0) ))
+    expected = "array([])"
+    assert(result==expected)
+
+    result = str(dpnp.empty( shape=(0, 0) ))
+    expected = "[]"
+    assert(result==expected)
+
+@pytest.mark.parametrize("func", [bool, float, int, complex])
+@pytest.mark.parametrize("shape", [tuple(), (1,), (1, 1), (1, 1, 1)])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False, no_complex=True))
+def test_scalar_type_casting(func, shape, dtype):
+    numpy_array = numpy.full(shape, 5, dtype=dtype)
+    dpnp_array = dpnp.full(shape, 5, dtype=dtype)
+    assert func(numpy_array) == func(dpnp_array)
+
+
+@pytest.mark.parametrize("method", ["__bool__", "__float__", "__int__", "__complex__"])
+@pytest.mark.parametrize("shape", [tuple(), (1,), (1, 1), (1, 1, 1)])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False, no_complex=True, no_none=True))
+def test_scalar_type_casting_by_method(method, shape, dtype):
+    numpy_array = numpy.full(shape, 4.7, dtype=dtype)
+    dpnp_array = dpnp.full(shape, 4.7, dtype=dtype)
+    assert getattr(numpy_array, method)() == getattr(dpnp_array, method)()
+
+
+@pytest.mark.parametrize("shape", [(1,), (1, 1), (1, 1, 1)])
+@pytest.mark.parametrize("index_dtype", [dpnp.int32, dpnp.int64])
+def test_array_as_index(shape, index_dtype):
+    ind_arr = dpnp.ones(shape, dtype=index_dtype)
+    a = numpy.arange(ind_arr.size + 1)
+    assert a[tuple(ind_arr)] == a[1]
diff --git a/tests/test_indexing.py b/tests/test_indexing.py
index 091cf1345c4e..1a40777afac8 100644
--- a/tests/test_indexing.py
+++ b/tests/test_indexing.py
@@ -3,6 +3,9 @@
 import dpnp
 
 import numpy
+from numpy.testing import (
+    assert_array_equal
+)
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
@@ -16,7 +19,7 @@ def test_choose():
 
     expected = numpy.choose([0, 0, 0, 0], [a, b, c])
     result = dpnp.choose([0, 0, 0, 0], [ia, ib, ic])
-    numpy.testing.assert_array_equal(expected, result)
+    assert_array_equal(expected, result)
 
 
 @pytest.mark.parametrize("offset",
@@ -47,7 +50,7 @@ def test_diagonal(array, offset):
     ia = dpnp.array(a)
     expected = numpy.diagonal(a, offset)
     result = dpnp.diagonal(ia, offset)
-    numpy.testing.assert_array_equal(expected, result)
+    assert_array_equal(expected, result)
 
 
 @pytest.mark.parametrize("val",
@@ -71,7 +74,7 @@ def test_fill_diagonal(array, val):
     ia = dpnp.array(a)
     expected = numpy.fill_diagonal(a, val)
     result = dpnp.fill_diagonal(ia, val)
-    numpy.testing.assert_array_equal(expected, result)
+    assert_array_equal(expected, result)
 
 
 @pytest.mark.parametrize("dimension",
@@ -81,7 +84,7 @@ def test_fill_diagonal(array, val):
 def test_indices(dimension):
     expected = numpy.indices(dimension)
     result = dpnp.indices(dimension)
-    numpy.testing.assert_array_equal(expected, result)
+    assert_array_equal(expected, result)
 
 
 @pytest.mark.parametrize("array",
@@ -107,7 +110,7 @@ def test_nonzero(array):
     ia = dpnp.array(array)
     expected = numpy.nonzero(a)
     result = dpnp.nonzero(ia)
-    numpy.testing.assert_array_equal(expected, result)
+    assert_array_equal(expected, result)
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
@@ -137,7 +140,7 @@ def test_place1(arr, mask, vals):
     im = dpnp.array(m)
     numpy.place(a, m, vals)
     dpnp.place(ia, im, vals)
-    numpy.testing.assert_array_equal(a, ia)
+    assert_array_equal(a, ia)
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
@@ -161,7 +164,7 @@ def test_place2(arr, mask, vals):
     im = dpnp.array(m)
     numpy.place(a, m, vals)
     dpnp.place(ia, im, vals)
-    numpy.testing.assert_array_equal(a, ia)
+    assert_array_equal(a, ia)
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
@@ -186,7 +189,7 @@ def test_place3(arr, mask, vals):
     im = dpnp.array(m)
     numpy.place(a, m, vals)
     dpnp.place(ia, im, vals)
-    numpy.testing.assert_array_equal(a, ia)
+    assert_array_equal(a, ia)
 
 
 @pytest.mark.parametrize("v",
@@ -211,7 +214,7 @@ def test_put(array, ind, v):
     ia = dpnp.array(a)
     numpy.put(a, ind, v)
     dpnp.put(ia, ind, v)
-    numpy.testing.assert_array_equal(a, ia)
+    assert_array_equal(a, ia)
 
 
 @pytest.mark.parametrize("v",
@@ -236,7 +239,7 @@ def test_put2(array, ind, v):
     ia = dpnp.array(a)
     numpy.put(a, ind, v)
     dpnp.put(ia, ind, v)
-    numpy.testing.assert_array_equal(a, ia)
+    assert_array_equal(a, ia)
 
 
 def test_put3():
@@ -244,7 +247,7 @@ def test_put3():
     ia = dpnp.array(a)
     dpnp.put(ia, [0, 2], [-44, -55])
     numpy.put(a, [0, 2], [-44, -55])
-    numpy.testing.assert_array_equal(a, ia)
+    assert_array_equal(a, ia)
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
@@ -256,7 +259,7 @@ def test_put_along_axis_val_int():
     for axis in range(2):
         numpy.put_along_axis(a, ind_r, 777, axis)
         dpnp.put_along_axis(ai, ind_r_i, 777, axis)
-        numpy.testing.assert_array_equal(a, ai)
+        assert_array_equal(a, ai)
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
@@ -268,7 +271,7 @@ def test_put_along_axis1():
     for axis in range(3):
         numpy.put_along_axis(a, ind_r, 777, axis)
         dpnp.put_along_axis(ai, ind_r_i, 777, axis)
-        numpy.testing.assert_array_equal(a, ai)
+        assert_array_equal(a, ai)
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
@@ -280,7 +283,7 @@ def test_put_along_axis2():
     for axis in range(3):
         numpy.put_along_axis(a, ind_r, [100, 200, 300, 400], axis)
         dpnp.put_along_axis(ai, ind_r_i, [100, 200, 300, 400], axis)
-        numpy.testing.assert_array_equal(a, ai)
+        assert_array_equal(a, ai)
 
 
 @pytest.mark.parametrize("vals",
@@ -309,7 +312,7 @@ def test_putmask1(arr, mask, vals):
     iv = dpnp.array(v)
     numpy.putmask(a, m, v)
     dpnp.putmask(ia, im, iv)
-    numpy.testing.assert_array_equal(a, ia)
+    assert_array_equal(a, ia)
 
 
 @pytest.mark.parametrize("vals",
@@ -334,7 +337,7 @@ def test_putmask2(arr, mask, vals):
     iv = dpnp.array(v)
     numpy.putmask(a, m, v)
     dpnp.putmask(ia, im, iv)
-    numpy.testing.assert_array_equal(a, ia)
+    assert_array_equal(a, ia)
 
 
 @pytest.mark.parametrize("vals",
@@ -360,7 +363,7 @@ def test_putmask3(arr, mask, vals):
     iv = dpnp.array(v)
     numpy.putmask(a, m, v)
     dpnp.putmask(ia, im, iv)
-    numpy.testing.assert_array_equal(a, ia)
+    assert_array_equal(a, ia)
 
 
 def test_select():
@@ -378,7 +381,7 @@ def test_select():
     ichoicelist = [ichoice_val1, ichoice_val2]
     expected = numpy.select(condlist, choicelist)
     result = dpnp.select(icondlist, ichoicelist)
-    numpy.testing.assert_array_equal(expected, result)
+    assert_array_equal(expected, result)
 
 
 @pytest.mark.parametrize("array_type",
@@ -415,10 +418,9 @@ def test_take(array, indices, array_type, indices_type):
     iind = dpnp.array(ind)
     expected = numpy.take(a, ind)
     result = dpnp.take(ia, iind)
-    numpy.testing.assert_array_equal(expected, result)
+    assert_array_equal(expected, result)
 
 
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 def test_take_along_axis():
     a = numpy.arange(16).reshape(4, 4)
     ai = dpnp.array(a)
@@ -427,10 +429,9 @@ def test_take_along_axis():
     for axis in range(2):
         expected = numpy.take_along_axis(a, ind_r, axis)
         result = dpnp.take_along_axis(ai, ind_r_i, axis)
-        numpy.testing.assert_array_equal(expected, result)
+        assert_array_equal(expected, result)
 
 
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 def test_take_along_axis1():
     a = numpy.arange(64).reshape(4, 4, 4)
     ai = dpnp.array(a)
@@ -439,7 +440,7 @@ def test_take_along_axis1():
     for axis in range(3):
         expected = numpy.take_along_axis(a, ind_r, axis)
         result = dpnp.take_along_axis(ai, ind_r_i, axis)
-        numpy.testing.assert_array_equal(expected, result)
+        assert_array_equal(expected, result)
 
 
 @pytest.mark.parametrize("m",
@@ -454,7 +455,7 @@ def test_take_along_axis1():
 def test_tril_indices(n, k, m):
     result = dpnp.tril_indices(n, k, m)
     expected = numpy.tril_indices(n, k, m)
-    numpy.testing.assert_array_equal(expected, result)
+    assert_array_equal(expected, result)
 
 
 @pytest.mark.parametrize("k",
@@ -472,7 +473,7 @@ def test_tril_indices_from(array, k):
     ia = dpnp.array(a)
     result = dpnp.tril_indices_from(ia, k)
     expected = numpy.tril_indices_from(a, k)
-    numpy.testing.assert_array_equal(expected, result)
+    assert_array_equal(expected, result)
 
 
 @pytest.mark.parametrize("m",
@@ -487,7 +488,7 @@ def test_tril_indices_from(array, k):
 def test_triu_indices(n, k, m):
     result = dpnp.triu_indices(n, k, m)
     expected = numpy.triu_indices(n, k, m)
-    numpy.testing.assert_array_equal(expected, result)
+    assert_array_equal(expected, result)
 
 
 @pytest.mark.parametrize("k",
@@ -505,4 +506,4 @@ def test_triu_indices_from(array, k):
     ia = dpnp.array(a)
     result = dpnp.triu_indices_from(ia, k)
     expected = numpy.triu_indices_from(a, k)
-    numpy.testing.assert_array_equal(expected, result)
+    assert_array_equal(expected, result)
diff --git a/tests/test_linalg.py b/tests/test_linalg.py
index dd89a18adbd6..d9784a41558f 100644
--- a/tests/test_linalg.py
+++ b/tests/test_linalg.py
@@ -1,9 +1,15 @@
 import pytest
+from .helper import get_all_dtypes
 
 import dpnp as inp
 
 import dpctl
+
 import numpy
+from numpy.testing import (
+    assert_allclose,
+    assert_array_equal
+)
 
 
 def vvsort(val, vec, size, xp):
@@ -49,7 +55,7 @@ def test_cholesky(array):
     ia = inp.array(a)
     result = inp.linalg.cholesky(ia)
     expected = numpy.linalg.cholesky(a)
-    numpy.testing.assert_array_equal(expected, result)
+    assert_array_equal(expected, result)
 
 
 @pytest.mark.parametrize("arr",
@@ -63,7 +69,7 @@ def test_cond(arr, p):
     ia = inp.array(a)
     result = inp.linalg.cond(ia, p)
     expected = numpy.linalg.cond(a, p)
-    numpy.testing.assert_array_equal(expected, result)
+    assert_array_equal(expected, result)
 
 
 @pytest.mark.parametrize("array",
@@ -82,13 +88,11 @@ def test_det(array):
     ia = inp.array(a)
     result = inp.linalg.det(ia)
     expected = numpy.linalg.det(a)
-    numpy.testing.assert_allclose(expected, result)
+    assert_allclose(expected, result)
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
-@pytest.mark.parametrize("type",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=['float64', 'float32', 'int64', 'int32'])
+@pytest.mark.parametrize("type", get_all_dtypes(no_bool=True, no_complex=True))
 @pytest.mark.parametrize("size",
                          [2, 4, 8, 16, 300])
 def test_eig_arange(type, size):
@@ -115,21 +119,19 @@ def test_eig_arange(type, size):
         if np_vec[0, i] * dpnp_vec[0, i] < 0:
             np_vec[:, i] = -np_vec[:, i]
 
-    numpy.testing.assert_array_equal(symm_orig, symm)
-    numpy.testing.assert_array_equal(dpnp_symm_orig, dpnp_symm)
+    assert_array_equal(symm_orig, symm)
+    assert_array_equal(dpnp_symm_orig, dpnp_symm)
 
     assert (dpnp_val.dtype == np_val.dtype)
     assert (dpnp_vec.dtype == np_vec.dtype)
     assert (dpnp_val.shape == np_val.shape)
     assert (dpnp_vec.shape == np_vec.shape)
 
-    numpy.testing.assert_allclose(dpnp_val, np_val, rtol=1e-05, atol=1e-05)
-    numpy.testing.assert_allclose(dpnp_vec, np_vec, rtol=1e-05, atol=1e-05)
+    assert_allclose(dpnp_val, np_val, rtol=1e-05, atol=1e-05)
+    assert_allclose(dpnp_vec, np_vec, rtol=1e-05, atol=1e-05)
 
 
-@pytest.mark.parametrize("type",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=['float64', 'float32', 'int64', 'int32'])
+@pytest.mark.parametrize("type", get_all_dtypes(no_bool=True, no_complex=True))
 def test_eigvals(type):
     if dpctl.get_current_device_type() != dpctl.device_type.gpu:
         pytest.skip("eigvals function doesn\'t work on CPU: https://github.com/IntelPython/dpnp/issues/1005")
@@ -144,12 +146,10 @@ def test_eigvals(type):
         ia = inp.array(a)
         result = inp.linalg.eigvals(ia)
         expected = numpy.linalg.eigvals(a)
-        numpy.testing.assert_allclose(expected, result, atol=0.5)
+        assert_allclose(expected, result, atol=0.5)
 
 
-@pytest.mark.parametrize("type",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=['float64', 'float32', 'int64', 'int32'])
+@pytest.mark.parametrize("type", get_all_dtypes(no_bool=True, no_complex=True))
 @pytest.mark.parametrize("array",
                          [[[1., 2.], [3., 4.]], [[0, 1, 2], [3, 2, -1], [4, -2, 3]]],
                          ids=['[[1., 2.], [3., 4.]]', '[[0, 1, 2], [3, 2, -1], [4, -2, 3]]'])
@@ -158,12 +158,10 @@ def test_inv(type, array):
     ia = inp.array(a)
     result = inp.linalg.inv(ia)
     expected = numpy.linalg.inv(a)
-    numpy.testing.assert_allclose(expected, result)
+    assert_allclose(expected, result)
 
 
-@pytest.mark.parametrize("type",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=['float64', 'float32', 'int64', 'int32'])
+@pytest.mark.parametrize("type", get_all_dtypes(no_bool=True, no_complex=True, no_none=True))
 @pytest.mark.parametrize("array",
                          [[0, 0], [0, 1], [1, 2], [[0, 0], [0, 0]], [[1, 2], [1, 2]], [[1, 2], [3, 4]]],
                          ids=['[0, 0]', '[0, 1]', '[1, 2]', '[[0, 0], [0, 0]]', '[[1, 2], [1, 2]]', '[[1, 2], [3, 4]]'])
@@ -177,10 +175,11 @@ def test_matrix_rank(type, tol, array):
     result = inp.linalg.matrix_rank(ia, tol=tol)
     expected = numpy.linalg.matrix_rank(a, tol=tol)
 
-    numpy.testing.assert_allclose(expected, result)
+    assert_allclose(expected, result)
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
+@pytest.mark.usefixtures("suppress_divide_numpy_warnings")
 @pytest.mark.parametrize("array",
                          [[7], [1, 2], [1, 0]],
                          ids=['[7]', '[1, 2]', '[1, 0]'])
@@ -195,7 +194,7 @@ def test_norm1(array, ord, axis):
     ia = inp.array(a)
     result = inp.linalg.norm(ia, ord=ord, axis=axis)
     expected = numpy.linalg.norm(a, ord=ord, axis=axis)
-    numpy.testing.assert_allclose(expected, result)
+    assert_allclose(expected, result)
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
@@ -213,7 +212,7 @@ def test_norm2(array, ord, axis):
     ia = inp.array(a)
     result = inp.linalg.norm(ia, ord=ord, axis=axis)
     expected = numpy.linalg.norm(a, ord=ord, axis=axis)
-    numpy.testing.assert_array_equal(expected, result)
+    assert_array_equal(expected, result)
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
@@ -231,13 +230,11 @@ def test_norm3(array, ord, axis):
     ia = inp.array(a)
     result = inp.linalg.norm(ia, ord=ord, axis=axis)
     expected = numpy.linalg.norm(a, ord=ord, axis=axis)
-    numpy.testing.assert_array_equal(expected, result)
+    assert_array_equal(expected, result)
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
-@pytest.mark.parametrize("type",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=['float64', 'float32', 'int64', 'int32'])
+@pytest.mark.parametrize("type", get_all_dtypes(no_bool=True, no_complex=True))
 @pytest.mark.parametrize("shape",
                          [(2, 2), (3, 4), (5, 3), (16, 16)],
                          ids=['(2,2)', '(3,4)', '(5,3)', '(16,16)'])
@@ -262,7 +259,7 @@ def test_qr(type, shape, mode):
         tol = 1e-11
 
     # check decomposition
-    numpy.testing.assert_allclose(ia, numpy.dot(inp.asnumpy(dpnp_q), inp.asnumpy(dpnp_r)), rtol=tol, atol=tol)
+    assert_allclose(ia, numpy.dot(inp.asnumpy(dpnp_q), inp.asnumpy(dpnp_r)), rtol=tol, atol=tol)
 
     # NP change sign for comparison
     ncols = min(a.shape[0], a.shape[1])
@@ -273,15 +270,12 @@ def test_qr(type, shape, mode):
             np_r[i, :] = -np_r[i, :]
 
         if numpy.any(numpy.abs(np_r[i, :]) > tol):
-            numpy.testing.assert_allclose(inp.asnumpy(dpnp_q)[:, i], np_q[:, i], rtol=tol, atol=tol)
+            assert_allclose(inp.asnumpy(dpnp_q)[:, i], np_q[:, i], rtol=tol, atol=tol)
 
-    numpy.testing.assert_allclose(dpnp_r, np_r, rtol=tol, atol=tol)
+    assert_allclose(dpnp_r, np_r, rtol=tol, atol=tol)
 
 
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
-@pytest.mark.parametrize("type",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=['float64', 'float32', 'int64', 'int32'])
+@pytest.mark.parametrize("type", get_all_dtypes(no_bool=True, no_complex=True))
 @pytest.mark.parametrize("shape",
                          [(2, 2), (3, 4), (5, 3), (16, 16)],
                          ids=['(2,2)', '(3,4)', '(5,3)', '(16,16)'])
@@ -310,10 +304,10 @@ def test_svd(type, shape):
         dpnp_diag_s[i, i] = dpnp_s[i]
 
     # check decomposition
-    numpy.testing.assert_allclose(ia, inp.dot(dpnp_u, inp.dot(dpnp_diag_s, dpnp_vt)), rtol=tol, atol=tol)
+    assert_allclose(ia, inp.dot(dpnp_u, inp.dot(dpnp_diag_s, dpnp_vt)), rtol=tol, atol=tol)
 
     # compare singular values
-    # numpy.testing.assert_allclose(dpnp_s, np_s, rtol=tol, atol=tol)
+    # assert_allclose(dpnp_s, np_s, rtol=tol, atol=tol)
 
     # change sign of vectors
     for i in range(min(shape[0], shape[1])):
@@ -323,5 +317,5 @@ def test_svd(type, shape):
 
     # compare vectors for non-zero values
     for i in range(numpy.count_nonzero(np_s > tol)):
-        numpy.testing.assert_allclose(inp.asnumpy(dpnp_u)[:, i], np_u[:, i], rtol=tol, atol=tol)
-        numpy.testing.assert_allclose(inp.asnumpy(dpnp_vt)[i, :], np_vt[i, :], rtol=tol, atol=tol)
+        assert_allclose(inp.asnumpy(dpnp_u)[:, i], np_u[:, i], rtol=tol, atol=tol)
+        assert_allclose(inp.asnumpy(dpnp_vt)[i, :], np_vt[i, :], rtol=tol, atol=tol)
diff --git a/tests/test_logic.py b/tests/test_logic.py
index b3280be07618..425106fd2efe 100644
--- a/tests/test_logic.py
+++ b/tests/test_logic.py
@@ -1,13 +1,16 @@
 import pytest
+from .helper import get_all_dtypes
 
 import dpnp
 
 import numpy
+from numpy.testing import (
+    assert_allclose,
+    assert_equal
+)
 
 
-@pytest.mark.parametrize("type",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32, numpy.bool, numpy.bool_],
-                         ids=['float64', 'float32', 'int64', 'int32', 'bool', 'bool_'])
+@pytest.mark.parametrize("type", get_all_dtypes(no_complex=True))
 @pytest.mark.parametrize("shape",
                          [(0,), (4,), (2, 3), (2, 2, 2)],
                          ids=['(0,)', '(4,)', '(2,3)', '(2,2,2)'])
@@ -31,16 +34,14 @@ def test_all(type, shape):
 
         np_res = numpy.all(a)
         dpnp_res = dpnp.all(ia)
-        numpy.testing.assert_allclose(dpnp_res, np_res)
+        assert_allclose(dpnp_res, np_res)
 
         np_res = a.all()
         dpnp_res = ia.all()
-        numpy.testing.assert_allclose(dpnp_res, np_res)
+        assert_allclose(dpnp_res, np_res)
 
 
-@pytest.mark.parametrize("type",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=['float64', 'float32', 'int64', 'int32'])
+@pytest.mark.parametrize("type", get_all_dtypes(no_bool=True, no_complex=True))
 def test_allclose(type):
 
     a = numpy.random.rand(10)
@@ -51,7 +52,7 @@ def test_allclose(type):
 
     np_res = numpy.allclose(a, b)
     dpnp_res = dpnp.allclose(dpnp_a, dpnp_b)
-    numpy.testing.assert_allclose(dpnp_res, np_res)
+    assert_allclose(dpnp_res, np_res)
 
     a[0] = numpy.inf
 
@@ -59,12 +60,10 @@ def test_allclose(type):
 
     np_res = numpy.allclose(a, b)
     dpnp_res = dpnp.allclose(dpnp_a, dpnp_b)
-    numpy.testing.assert_allclose(dpnp_res, np_res)
+    assert_allclose(dpnp_res, np_res)
 
 
-@pytest.mark.parametrize("type",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32, numpy.bool, numpy.bool_],
-                         ids=['float64', 'float32', 'int64', 'int32', 'bool', 'bool_'])
+@pytest.mark.parametrize("type", get_all_dtypes(no_complex=True))
 @pytest.mark.parametrize("shape",
                          [(0,), (4,), (2, 3), (2, 2, 2)],
                          ids=['(0,)', '(4,)', '(2,3)', '(2,2,2)'])
@@ -88,58 +87,156 @@ def test_any(type, shape):
 
         np_res = numpy.any(a)
         dpnp_res = dpnp.any(ia)
-        numpy.testing.assert_allclose(dpnp_res, np_res)
+        assert_allclose(dpnp_res, np_res)
 
         np_res = a.any()
         dpnp_res = ia.any()
-        numpy.testing.assert_allclose(dpnp_res, np_res)
+        assert_allclose(dpnp_res, np_res)
+
+
+def test_equal():
+    a = numpy.array([1, 2, 3, 4, 5, 6, 7, 8])
+    ia = dpnp.array(a)
+    for i in range(len(a)):
+        np_res = (a == i)
+        dpnp_res = (ia == i)
+        assert_equal(dpnp_res, np_res)
 
 
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 def test_greater():
     a = numpy.array([1, 2, 3, 4, 5, 6, 7, 8])
     ia = dpnp.array(a)
     for i in range(len(a) + 1):
         np_res = (a > i)
         dpnp_res = (ia > i)
-        numpy.testing.assert_equal(dpnp_res, np_res)
+        assert_equal(dpnp_res, np_res)
 
 
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 def test_greater_equal():
     a = numpy.array([1, 2, 3, 4, 5, 6, 7, 8])
     ia = dpnp.array(a)
     for i in range(len(a) + 1):
         np_res = (a >= i)
         dpnp_res = (ia >= i)
-        numpy.testing.assert_equal(dpnp_res, np_res)
+        assert_equal(dpnp_res, np_res)
 
 
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 def test_less():
     a = numpy.array([1, 2, 3, 4, 5, 6, 7, 8])
     ia = dpnp.array(a)
     for i in range(len(a) + 1):
         np_res = (a < i)
         dpnp_res = (ia < i)
-        numpy.testing.assert_equal(dpnp_res, np_res)
+        assert_equal(dpnp_res, np_res)
 
 
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 def test_less_equal():
     a = numpy.array([1, 2, 3, 4, 5, 6, 7, 8])
     ia = dpnp.array(a)
     for i in range(len(a) + 1):
         np_res = (a <= i)
         dpnp_res = (ia <= i)
-        numpy.testing.assert_equal(dpnp_res, np_res)
+        assert_equal(dpnp_res, np_res)
 
 
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 def test_not_equal():
     a = numpy.array([1, 2, 3, 4, 5, 6, 7, 8])
     ia = dpnp.array(a)
     for i in range(len(a)):
         np_res = (a != i)
         dpnp_res = (ia != i)
-        numpy.testing.assert_equal(dpnp_res, np_res)
+        assert_equal(dpnp_res, np_res)
+
+
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_complex=True))
+@pytest.mark.parametrize("op",
+                        ['logical_and', 'logical_or', 'logical_xor'],
+                        ids=['logical_and', 'logical_or', 'logical_xor'])
+def test_logic_comparison(op, dtype):
+    a = numpy.array([0, 0, 3, 2], dtype=dtype)
+    b = numpy.array([0, 4, 0, 2], dtype=dtype)
+
+    # x1 OP x2
+    np_res = getattr(numpy, op)(a, b)
+    dpnp_res = getattr(dpnp, op)(dpnp.array(a), dpnp.array(b))
+    assert_equal(dpnp_res, np_res)
+
+    # x2 OP x1
+    np_res = getattr(numpy, op)(b, a)
+    dpnp_res = getattr(dpnp, op)(dpnp.array(b), dpnp.array(a))
+    assert_equal(dpnp_res, np_res)
+
+    # numpy.tile(x1, (10,)) OP numpy.tile(x2, (10,))
+    a, b = numpy.tile(a, (10,)), numpy.tile(b, (10,))
+    np_res = getattr(numpy, op)(a, b)
+    dpnp_res = getattr(dpnp, op)(dpnp.array(a), dpnp.array(b))
+    assert_equal(dpnp_res, np_res)
+
+    # numpy.tile(x2, (10, 2)) OP numpy.tile(x1, (10, 2))
+    a, b = numpy.tile(a, (10, 1)), numpy.tile(b, (10, 1))
+    np_res = getattr(numpy, op)(b, a)
+    dpnp_res = getattr(dpnp, op)(dpnp.array(b), dpnp.array(a))
+    assert_equal(dpnp_res, np_res)
+
+
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_complex=True))
+def test_logical_not(dtype):
+    a = dpnp.array([0, 4, 0, 2], dtype=dtype)
+
+    np_res = numpy.logical_not(a.asnumpy())
+    dpnp_res = dpnp.logical_not(a)
+    assert_equal(dpnp_res, np_res)
+
+
+@pytest.mark.parametrize("op",
+                         ['equal', 'greater', 'greater_equal', 'less', 'less_equal',
+                          'logical_and', 'logical_or', 'logical_xor', 'not_equal'],
+                         ids=['equal', 'greater', 'greater_equal', 'less', 'less_equal',
+                              'logical_and', 'logical_or', 'logical_xor', 'not_equal'])
+@pytest.mark.parametrize("x1",
+                         [[3, 4, 5, 6], [[1, 2, 3, 4], [5, 6, 7, 8]], [[1, 2, 5, 6], [3, 4, 7, 8], [1, 2, 7, 8]]],
+                         ids=['[3, 4, 5, 6]', '[[1, 2, 3, 4], [5, 6, 7, 8]]', '[[1, 2, 5, 6], [3, 4, 7, 8], [1, 2, 7, 8]]'])
+@pytest.mark.parametrize("x2",
+                         [5, [1, 2, 5, 6]],
+                         ids=['5', '[1, 2, 5, 6]'])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_complex=True))
+def test_elemwise_comparison(op, x1, x2, dtype):
+    create_func = lambda xp, a: xp.asarray(a, dtype=dtype) if not numpy.isscalar(a) else numpy.dtype(dtype=dtype).type(a)
+
+    np_x1, np_x2 = create_func(numpy, x1), create_func(numpy, x2)
+    dp_x1, dp_x2 = create_func(dpnp, np_x1), create_func(dpnp, np_x2)
+
+    # x1 OP x2
+    np_res = getattr(numpy, op)(np_x1, np_x2)
+    dpnp_res = getattr(dpnp, op)(dp_x1, dp_x2)
+    assert_equal(dpnp_res, np_res)
+
+    # x2 OP x1
+    np_res = getattr(numpy, op)(np_x2, np_x1)
+    dpnp_res = getattr(dpnp, op)(dp_x2, dp_x1)
+    assert_equal(dpnp_res, np_res)
+
+    # x1[::-1] OP x2
+    np_res = getattr(numpy, op)(np_x1[::-1], np_x2)
+    dpnp_res = getattr(dpnp, op)(dp_x1[::-1], dp_x2)
+    assert_equal(dpnp_res, np_res)
+
+
+@pytest.mark.parametrize("op",
+                         ['equal', 'greater', 'greater_equal', 'less', 'less_equal',
+                          'logical_and', 'logical_or', 'logical_xor', 'not_equal'],
+                         ids=['equal', 'greater', 'greater_equal', 'less', 'less_equal',
+                              'logical_and', 'logical_or', 'logical_xor', 'not_equal'])
+@pytest.mark.parametrize("sh1",
+                         [[10], [8, 4], [4, 1, 2]],
+                         ids=['(10,)', '(8, 4)', '(4, 1, 2)'])
+@pytest.mark.parametrize("sh2",
+                         [[12], [4, 8], [1, 8, 6]],
+                         ids=['(12,)', '(4, 8)', '(1, 8, 6)'])
+def test_comparison_no_broadcast_with_shapes(op, sh1, sh2):
+    x1, x2 = dpnp.random.randn(*sh1), dpnp.random.randn(*sh2)
+
+    # x1 OP x2
+    with pytest.raises(ValueError):
+        getattr(dpnp, op)(x1, x2)
+        getattr(numpy, op)(x1.asnumpy(), x2.asnumpy())
diff --git a/tests/test_mathematical.py b/tests/test_mathematical.py
index 21071bec41e9..78f628908337 100644
--- a/tests/test_mathematical.py
+++ b/tests/test_mathematical.py
@@ -1,8 +1,15 @@
 import pytest
+from .helper import get_all_dtypes
 
 import dpnp
 
 import numpy
+from numpy.testing import (
+    assert_allclose,
+    assert_array_almost_equal,
+    assert_array_equal,
+    assert_raises
+)
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
@@ -10,27 +17,27 @@ class TestConvolve:
     def test_object(self):
         d = [1.] * 100
         k = [1.] * 3
-        numpy.testing.assert_array_almost_equal(dpnp.convolve(d, k)[2:-2], dpnp.full(98, 3))
+        assert_array_almost_equal(dpnp.convolve(d, k)[2:-2], dpnp.full(98, 3))
 
     def test_no_overwrite(self):
         d = dpnp.ones(100)
         k = dpnp.ones(3)
         dpnp.convolve(d, k)
-        numpy.testing.assert_array_equal(d, dpnp.ones(100))
-        numpy.testing.assert_array_equal(k, dpnp.ones(3))
+        assert_array_equal(d, dpnp.ones(100))
+        assert_array_equal(k, dpnp.ones(3))
 
     def test_mode(self):
         d = dpnp.ones(100)
         k = dpnp.ones(3)
         default_mode = dpnp.convolve(d, k, mode='full')
         full_mode = dpnp.convolve(d, k, mode='f')
-        numpy.testing.assert_array_equal(full_mode, default_mode)
+        assert_array_equal(full_mode, default_mode)
         # integer mode
-        with numpy.testing.assert_raises(ValueError):
+        with assert_raises(ValueError):
             dpnp.convolve(d, k, mode=-1)
-        numpy.testing.assert_array_equal(dpnp.convolve(d, k, mode=2), full_mode)
+        assert_array_equal(dpnp.convolve(d, k, mode=2), full_mode)
         # illegal arguments
-        with numpy.testing.assert_raises(TypeError):
+        with assert_raises(TypeError):
             dpnp.convolve(d, k, mode=None)
 
 
@@ -53,33 +60,39 @@ def test_diff(array):
     dpnp_a = dpnp.array(array)
     expected = numpy.diff(np_a)
     result = dpnp.diff(dpnp_a)
-    numpy.testing.assert_allclose(expected, result)
+    assert_allclose(expected, result)
 
 
-@pytest.mark.parametrize("dtype1",
-                         [numpy.bool_, numpy.float64, numpy.float32, numpy.int64, numpy.int32, numpy.complex64, numpy.complex128],
-                         ids=['numpy.bool_', 'numpy.float64', 'numpy.float32', 'numpy.int64', 'numpy.int32', 'numpy.complex64', 'numpy.complex128'])
-@pytest.mark.parametrize("dtype2",
-                         [numpy.bool_, numpy.float64, numpy.float32, numpy.int64, numpy.int32, numpy.complex64, numpy.complex128],
-                         ids=['numpy.bool_', 'numpy.float64', 'numpy.float32', 'numpy.int64', 'numpy.int32', 'numpy.complex64', 'numpy.complex128'])
+@pytest.mark.parametrize("dtype1", get_all_dtypes())
+@pytest.mark.parametrize("dtype2", get_all_dtypes())
+@pytest.mark.parametrize("func",
+                         ['add', 'multiply', 'subtract', 'divide'])
 @pytest.mark.parametrize("data",
                          [[[1, 2], [3, 4]]],
                          ids=['[[1, 2], [3, 4]]'])
-def test_multiply_dtype(dtype1, dtype2, data):
+def test_op_multiple_dtypes(dtype1, func, dtype2, data):
     np_a = numpy.array(data, dtype=dtype1)
     dpnp_a = dpnp.array(data, dtype=dtype1)
 
     np_b = numpy.array(data, dtype=dtype2)
     dpnp_b = dpnp.array(data, dtype=dtype2)
 
-    result = dpnp.multiply(dpnp_a, dpnp_b)
-    expected = numpy.multiply(np_a, np_b)
-    numpy.testing.assert_array_equal(result, expected)
+    if func == 'subtract' and (dtype1 == dtype2 == dpnp.bool):
+        with pytest.raises(TypeError):
+            result = getattr(dpnp, func)(dpnp_a, dpnp_b)
+            expected = getattr(numpy, func)(np_a, np_b)
+    else:
+        result = getattr(dpnp, func)(dpnp_a, dpnp_b)
+        expected = getattr(numpy, func)(np_a, np_b)
+        assert_array_equal(result, expected)
 
 
 @pytest.mark.parametrize("rhs", [[[1, 2, 3], [4, 5, 6]], [2.0, 1.5, 1.0], 3, 0.3])
-@pytest.mark.parametrize("lhs", [[[6, 5, 4], [3, 2, 1]], [1.3, 2.6, 3.9], 5, 0.5])
-@pytest.mark.parametrize("dtype", [numpy.int32, numpy.int64, numpy.float32, numpy.float64])
+@pytest.mark.parametrize("lhs", [[[6, 5, 4], [3, 2, 1]], [1.3, 2.6, 3.9]])
+# TODO: achieve the same level of dtype support for all mathematical operations, like
+# @pytest.mark.parametrize("dtype", get_all_dtypes())
+# and to get rid of fallbacks on numpy allowed by below fixture
+# @pytest.mark.usefixtures("allow_fall_back_on_numpy")
 class TestMathematical:
 
     @staticmethod
@@ -90,74 +103,90 @@ def array_or_scalar(xp, data, dtype=None):
         return xp.array(data, dtype=dtype)
 
     def _test_mathematical(self, name, dtype, lhs, rhs):
-        a = self.array_or_scalar(dpnp, lhs, dtype=dtype)
-        b = self.array_or_scalar(dpnp, rhs, dtype=dtype)
-        result = getattr(dpnp, name)(a, b)
-
-        a = self.array_or_scalar(numpy, lhs, dtype=dtype)
-        b = self.array_or_scalar(numpy, rhs, dtype=dtype)
-        expected = getattr(numpy, name)(a, b)
-
-        numpy.testing.assert_allclose(result, expected, atol=1e-4)
-
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
+        a_dpnp = self.array_or_scalar(dpnp, lhs, dtype=dtype)
+        b_dpnp = self.array_or_scalar(dpnp, rhs, dtype=dtype)
+
+        a_np = self.array_or_scalar(numpy, lhs, dtype=dtype)
+        b_np = self.array_or_scalar(numpy, rhs, dtype=dtype)
+
+        if name == 'subtract' and not numpy.isscalar(rhs) and dtype == dpnp.bool:
+            with pytest.raises(TypeError):
+                result = getattr(dpnp, name)(a_dpnp, b_dpnp)
+                expected = getattr(numpy, name)(a_np, b_np)
+        else:
+            result = getattr(dpnp, name)(a_dpnp, b_dpnp)
+            expected = getattr(numpy, name)(a_np, b_np)
+            assert_allclose(result, expected, atol=1e-4)
+
+    @pytest.mark.parametrize("dtype", get_all_dtypes())
     def test_add(self, dtype, lhs, rhs):
         self._test_mathematical('add', dtype, lhs, rhs)
 
     @pytest.mark.usefixtures("allow_fall_back_on_numpy")
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
     def test_arctan2(self, dtype, lhs, rhs):
         self._test_mathematical('arctan2', dtype, lhs, rhs)
 
     @pytest.mark.usefixtures("allow_fall_back_on_numpy")
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
     def test_copysign(self, dtype, lhs, rhs):
         self._test_mathematical('copysign', dtype, lhs, rhs)
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
+    @pytest.mark.parametrize("dtype", get_all_dtypes())
     def test_divide(self, dtype, lhs, rhs):
         self._test_mathematical('divide', dtype, lhs, rhs)
 
     @pytest.mark.usefixtures("allow_fall_back_on_numpy")
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
     def test_fmod(self, dtype, lhs, rhs):
         self._test_mathematical('fmod', dtype, lhs, rhs)
 
     @pytest.mark.usefixtures("allow_fall_back_on_numpy")
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
     def test_floor_divide(self, dtype, lhs, rhs):
         self._test_mathematical('floor_divide', dtype, lhs, rhs)
 
     @pytest.mark.usefixtures("allow_fall_back_on_numpy")
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
     def test_hypot(self, dtype, lhs, rhs):
         self._test_mathematical('hypot', dtype, lhs, rhs)
 
     @pytest.mark.usefixtures("allow_fall_back_on_numpy")
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
     def test_maximum(self, dtype, lhs, rhs):
         self._test_mathematical('maximum', dtype, lhs, rhs)
 
     @pytest.mark.usefixtures("allow_fall_back_on_numpy")
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
     def test_minimum(self, dtype, lhs, rhs):
         self._test_mathematical('minimum', dtype, lhs, rhs)
 
+    @pytest.mark.parametrize("dtype", get_all_dtypes())
     def test_multiply(self, dtype, lhs, rhs):
         self._test_mathematical('multiply', dtype, lhs, rhs)
 
     @pytest.mark.usefixtures("allow_fall_back_on_numpy")
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
     def test_remainder(self, dtype, lhs, rhs):
         self._test_mathematical('remainder', dtype, lhs, rhs)
 
     @pytest.mark.usefixtures("allow_fall_back_on_numpy")
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
     def test_power(self, dtype, lhs, rhs):
         self._test_mathematical('power', dtype, lhs, rhs)
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
+    @pytest.mark.parametrize("dtype", get_all_dtypes())
     def test_subtract(self, dtype, lhs, rhs):
         self._test_mathematical('subtract', dtype, lhs, rhs)
 
 
+@pytest.mark.usefixtures("suppress_divide_invalid_numpy_warnings")
 @pytest.mark.parametrize("val_type",
                          [bool, int, float],
                          ids=['bool', 'int', 'float'])
-@pytest.mark.parametrize("data_type",
-                         [numpy.bool_, numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=['numpy.bool_', 'numpy.float64', 'numpy.float32', 'numpy.int64', 'numpy.int32'])
+@pytest.mark.parametrize("data_type", get_all_dtypes())
+@pytest.mark.parametrize("func",
+                         ['add', 'multiply', 'subtract', 'divide'])
 @pytest.mark.parametrize("val",
                          [0, 1, 5],
                          ids=['0', '1', '5'])
@@ -172,33 +201,78 @@ def test_subtract(self, dtype, lhs, rhs):
                               '[[1, 2], [3, 4]]',
                               '[[[1, 2], [3, 4]], [[1, 2], [2, 1]], [[1, 3], [3, 1]]]',
                               '[[[[1, 2], [3, 4]], [[1, 2], [2, 1]]], [[[1, 3], [3, 1]], [[0, 1], [1, 3]]]]'])
-def test_multiply_scalar(array, val, data_type, val_type):
+def test_op_with_scalar(array, val, func, data_type, val_type):
     np_a = numpy.array(array, dtype=data_type)
     dpnp_a = dpnp.array(array, dtype=data_type)
     val_ = val_type(val)
 
-    result = dpnp.multiply(dpnp_a, val_)
-    expected = numpy.multiply(np_a, val_)
-    numpy.testing.assert_array_equal(result, expected)
+    if func == 'subtract' and val_type == bool and data_type == dpnp.bool:
+        with pytest.raises(TypeError):
+            result = getattr(dpnp, func)(dpnp_a, val_)
+            expected = getattr(numpy, func)(np_a, val_)
+
+            result = getattr(dpnp, func)(val_, dpnp_a)
+            expected = getattr(numpy, func)(val_, np_a)
+    else:
+        result = getattr(dpnp, func)(dpnp_a, val_)
+        expected = getattr(numpy, func)(np_a, val_)
+        assert_allclose(result, expected)
+
+        result = getattr(dpnp, func)(val_, dpnp_a)
+        expected = getattr(numpy, func)(val_, np_a)
+        assert_allclose(result, expected)
+
+
+@pytest.mark.parametrize("shape",
+                         [(), (3, 2)],
+                         ids=['()', '(3, 2)'])
+@pytest.mark.parametrize("dtype", get_all_dtypes())
+def test_multiply_scalar(shape, dtype):
+    np_a = numpy.ones(shape, dtype=dtype)
+    dpnp_a = dpnp.ones(shape, dtype=dtype)
+
+    result = 0.5 * dpnp_a * 1.7
+    expected = 0.5 * np_a * 1.7
+    assert_allclose(result, expected)
+
 
-    result = dpnp.multiply(val_, dpnp_a)
-    expected = numpy.multiply(val_, np_a)
-    numpy.testing.assert_array_equal(result, expected)
+@pytest.mark.parametrize("shape",
+                         [(), (3, 2)],
+                         ids=['()', '(3, 2)'])
+@pytest.mark.parametrize("dtype", get_all_dtypes())
+def test_add_scalar(shape, dtype):
+    np_a = numpy.ones(shape, dtype=dtype)
+    dpnp_a = dpnp.ones(shape, dtype=dtype)
+
+    result = 0.5 + dpnp_a + 1.7
+    expected = 0.5 + np_a + 1.7
+    assert_allclose(result, expected)
 
 
 @pytest.mark.parametrize("shape",
                          [(), (3, 2)],
                          ids=['()', '(3, 2)'])
-@pytest.mark.parametrize("dtype",
-                         [numpy.float32, numpy.float64],
-                         ids=['numpy.float32', 'numpy.float64'])
-def test_multiply_scalar2(shape, dtype):
+@pytest.mark.parametrize("dtype", get_all_dtypes())
+def test_subtract_scalar(shape, dtype):
     np_a = numpy.ones(shape, dtype=dtype)
     dpnp_a = dpnp.ones(shape, dtype=dtype)
 
-    result = 0.5 * dpnp_a
-    expected = 0.5 * np_a
-    numpy.testing.assert_array_equal(result, expected)
+    result = 0.5 - dpnp_a - 1.7
+    expected = 0.5 - np_a - 1.7
+    assert_allclose(result, expected)
+
+
+@pytest.mark.parametrize("shape",
+                         [(), (3, 2)],
+                         ids=['()', '(3, 2)'])
+@pytest.mark.parametrize("dtype", get_all_dtypes())
+def test_divide_scalar(shape, dtype):
+    np_a = numpy.ones(shape, dtype=dtype)
+    dpnp_a = dpnp.ones(shape, dtype=dtype)
+
+    result = 0.5 / dpnp_a / 1.7
+    expected = 0.5 / np_a / 1.7
+    assert_allclose(result, expected)
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
@@ -211,7 +285,7 @@ def test_nancumprod(array):
 
     result = dpnp.nancumprod(dpnp_a)
     expected = numpy.nancumprod(np_a)
-    numpy.testing.assert_array_equal(expected, result)
+    assert_array_equal(expected, result)
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
@@ -224,31 +298,25 @@ def test_nancumsum(array):
 
     result = dpnp.nancumsum(dpnp_a)
     expected = numpy.nancumsum(np_a)
-    numpy.testing.assert_array_equal(expected, result)
+    assert_array_equal(expected, result)
 
 
 @pytest.mark.parametrize("data",
                          [[[1., -1.], [0.1, -0.1]], [-2, -1, 0, 1, 2]],
                          ids=['[[1., -1.], [0.1, -0.1]]', '[-2, -1, 0, 1, 2]'])
-@pytest.mark.parametrize("dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=['numpy.float64', 'numpy.float32', 'numpy.int64', 'numpy.int32'])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
 def test_negative(data, dtype):
     np_a = numpy.array(data, dtype=dtype)
     dpnp_a = dpnp.array(data, dtype=dtype)
 
     result = dpnp.negative(dpnp_a)
     expected = numpy.negative(np_a)
-    numpy.testing.assert_array_equal(result, expected)
+    assert_array_equal(result, expected)
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
-@pytest.mark.parametrize("val_type",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=['numpy.float64', 'numpy.float32', 'numpy.int64', 'numpy.int32'])
-@pytest.mark.parametrize("data_type",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=['numpy.float64', 'numpy.float32', 'numpy.int64', 'numpy.int32'])
+@pytest.mark.parametrize("val_type", get_all_dtypes(no_bool=True, no_complex=True, no_none=True))
+@pytest.mark.parametrize("data_type", get_all_dtypes(no_bool=True, no_complex=True))
 @pytest.mark.parametrize("val",
                          [0, 1, 5],
                          ids=['0', '1', '5'])
@@ -269,12 +337,11 @@ def test_power(array, val, data_type, val_type):
     val_ = val_type(val)
     result = dpnp.power(dpnp_a, val_)
     expected = numpy.power(np_a, val_)
-    numpy.testing.assert_array_equal(expected, result)
+    assert_array_equal(expected, result)
 
 
 class TestEdiff1d:
-    @pytest.mark.parametrize("data_type",
-                             [numpy.float64, numpy.float32, numpy.int64, numpy.int32])
+    @pytest.mark.parametrize("data_type", get_all_dtypes(no_bool=True, no_complex=True))
     @pytest.mark.parametrize("array", [[1, 2, 4, 7, 0],
                                        [],
                                        [1],
@@ -285,7 +352,7 @@ def test_ediff1d_int(self, array, data_type):
 
         result = dpnp.ediff1d(dpnp_a)
         expected = numpy.ediff1d(np_a)
-        numpy.testing.assert_array_equal(expected, result)
+        assert_array_equal(expected, result)
 
     
     @pytest.mark.usefixtures("allow_fall_back_on_numpy")
@@ -297,13 +364,12 @@ def test_ediff1d_args(self):
 
         result = dpnp.ediff1d(np_a, to_end=to_end, to_begin=to_begin)
         expected = numpy.ediff1d(np_a, to_end=to_end, to_begin=to_begin)
-        numpy.testing.assert_array_equal(expected, result)
+        assert_array_equal(expected, result)
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
 class TestTrapz:
-    @pytest.mark.parametrize("data_type",
-                             [numpy.float64, numpy.float32, numpy.int64, numpy.int32])
+    @pytest.mark.parametrize("data_type", get_all_dtypes(no_bool=True, no_complex=True))
     @pytest.mark.parametrize("array", [[1, 2, 3],
                                        [[1, 2, 3], [4, 5, 6]],
                                        [1, 4, 6, 9, 10, 12],
@@ -315,12 +381,10 @@ def test_trapz_default(self, array, data_type):
 
         result = dpnp.trapz(dpnp_a)
         expected = numpy.trapz(np_a)
-        numpy.testing.assert_array_equal(expected, result)
+        assert_array_equal(expected, result)
 
-    @pytest.mark.parametrize("data_type_y",
-                             [numpy.float64, numpy.float32, numpy.int64, numpy.int32])
-    @pytest.mark.parametrize("data_type_x",
-                             [numpy.float64, numpy.float32, numpy.int64, numpy.int32])
+    @pytest.mark.parametrize("data_type_y", get_all_dtypes(no_bool=True, no_complex=True))
+    @pytest.mark.parametrize("data_type_x", get_all_dtypes(no_bool=True, no_complex=True))
     @pytest.mark.parametrize("y_array", [[1, 2, 4, 5],
                                          [1., 2.5, 6., 7.]])
     @pytest.mark.parametrize("x_array", [[2, 5, 6, 9]])
@@ -333,7 +397,7 @@ def test_trapz_with_x_params(self, y_array, x_array, data_type_y, data_type_x):
 
         result = dpnp.trapz(dpnp_y, dpnp_x)
         expected = numpy.trapz(np_y, np_x)
-        numpy.testing.assert_array_equal(expected, result)
+        assert_array_equal(expected, result)
 
     @pytest.mark.parametrize("array", [[1, 2, 3], [4, 5, 6]])
     def test_trapz_with_x_param_2ndim(self, array):
@@ -342,7 +406,7 @@ def test_trapz_with_x_param_2ndim(self, array):
 
         result = dpnp.trapz(dpnp_a, dpnp_a)
         expected = numpy.trapz(np_a, np_a)
-        numpy.testing.assert_array_equal(expected, result)
+        assert_array_equal(expected, result)
 
     @pytest.mark.parametrize("y_array", [[1, 2, 4, 5],
                                          [1., 2.5, 6., 7., ]])
@@ -353,7 +417,7 @@ def test_trapz_with_dx_params(self, y_array, dx):
 
         result = dpnp.trapz(dpnp_y, dx=dx)
         expected = numpy.trapz(np_y, dx=dx)
-        numpy.testing.assert_array_equal(expected, result)
+        assert_array_equal(expected, result)
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
@@ -388,10 +452,9 @@ def test_cross_3x3(self, x1, x2, axisa, axisb, axisc, axis):
 
         result = dpnp.cross(dpnp_x1, dpnp_x2, axisa, axisb, axisc, axis)
         expected = numpy.cross(np_x1, np_x2, axisa, axisb, axisc, axis)
-        numpy.testing.assert_array_equal(expected, result)
+        assert_array_equal(expected, result)
 
 
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 class TestGradient:
 
     @pytest.mark.parametrize("array", [[2, 3, 6, 8, 4, 9],
@@ -403,8 +466,9 @@ def test_gradient_y1(self, array):
 
         result = dpnp.gradient(dpnp_y)
         expected = numpy.gradient(np_y)
-        numpy.testing.assert_array_equal(expected, result)
+        assert_array_equal(expected, result)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @pytest.mark.parametrize("array", [[2, 3, 6, 8, 4, 9],
                                        [3., 4., 7.5, 9.],
                                        [2, 6, 8, 10]])
@@ -415,7 +479,7 @@ def test_gradient_y1_dx(self, array, dx):
 
         result = dpnp.gradient(dpnp_y, dx)
         expected = numpy.gradient(np_y, dx)
-        numpy.testing.assert_array_equal(expected, result)
+        assert_array_equal(expected, result)
 
 
 class TestCeil:
@@ -433,7 +497,7 @@ def test_ceil(self):
         np_array = numpy.array(array_data, dtype=numpy.float64)
         expected = numpy.ceil(np_array, out=out)
 
-        numpy.testing.assert_array_equal(expected, result)
+        assert_array_equal(expected, result)
 
     @pytest.mark.parametrize("dtype",
                              [numpy.float32, numpy.int64, numpy.int32],
@@ -473,7 +537,7 @@ def test_floor(self):
         np_array = numpy.array(array_data, dtype=numpy.float64)
         expected = numpy.floor(np_array, out=out)
 
-        numpy.testing.assert_array_equal(expected, result)
+        assert_array_equal(expected, result)
 
     @pytest.mark.parametrize("dtype",
                              [numpy.float32, numpy.int64, numpy.int32],
@@ -513,7 +577,7 @@ def test_trunc(self):
         np_array = numpy.array(array_data, dtype=numpy.float64)
         expected = numpy.trunc(np_array, out=out)
 
-        numpy.testing.assert_array_equal(expected, result)
+        assert_array_equal(expected, result)
 
     @pytest.mark.parametrize("dtype",
                              [numpy.float32, numpy.int64, numpy.int32],
@@ -556,7 +620,7 @@ def test_power(self):
         np_array2 = numpy.array(array2_data, dtype=numpy.float64)
         expected = numpy.power(np_array1, np_array2, out=out)
 
-        numpy.testing.assert_array_equal(expected, result)
+        assert_array_equal(expected, result)
 
     @pytest.mark.parametrize("dtype",
                              [numpy.float32, numpy.int64, numpy.int32],
diff --git a/tests/test_random.py b/tests/test_random.py
index 54cb2fa3a4d7..bc3501f4d20b 100644
--- a/tests/test_random.py
+++ b/tests/test_random.py
@@ -75,7 +75,6 @@ def test_input_shape(func):
     assert shape == res.shape
 
 
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @pytest.mark.parametrize("func",
                          [dpnp.random.random,
                           dpnp.random.random_sample,
diff --git a/tests/test_random_state.py b/tests/test_random_state.py
index b93f52411c5d..0d1752c744ee 100644
--- a/tests/test_random_state.py
+++ b/tests/test_random_state.py
@@ -34,8 +34,8 @@ def get_default_floating():
 
 class TestNormal:
     @pytest.mark.parametrize("dtype",
-                             [dpnp.float32, dpnp.float64, None],
-                             ids=['float32', 'float64', 'None'])
+                             [dpnp.float32, dpnp.float64, dpnp.float, None],
+                             ids=['float32', 'float64', 'float', 'None'])
     @pytest.mark.parametrize("usm_type",
                              ["host", "device", "shared"],
                              ids=['host', 'device', 'shared'])
@@ -173,9 +173,9 @@ def test_fallback(self, loc, scale):
 
 
     @pytest.mark.parametrize("dtype",
-                             [dpnp.float16, dpnp.float, float, dpnp.integer, dpnp.int64, dpnp.int32, dpnp.int, int,
+                             [dpnp.float16, float, dpnp.integer, dpnp.int64, dpnp.int32, dpnp.int, int,
                               dpnp.longcomplex, dpnp.complex128, dpnp.complex64, dpnp.bool, dpnp.bool_],
-                             ids=['dpnp.float16', 'dpnp.float', 'float', 'dpnp.integer', 'dpnp.int64', 'dpnp.int32', 'dpnp.int', 'int',
+                             ids=['dpnp.float16', 'float', 'dpnp.integer', 'dpnp.int64', 'dpnp.int32', 'dpnp.int', 'int',
                                   'dpnp.longcomplex', 'dpnp.complex128', 'dpnp.complex64', 'dpnp.bool', 'dpnp.bool_'])
     def test_invalid_dtype(self, dtype):
         # dtype must be float32 or float64
@@ -257,8 +257,8 @@ def test_wrong_dims(self):
 
 class TestRandInt:
     @pytest.mark.parametrize("dtype",
-                             [int, dpnp.int32, dpnp.int],
-                             ids=['int', 'dpnp.int32', 'dpnp.int'])
+                             [int, dpnp.int32, dpnp.int, dpnp.integer],
+                             ids=['int', 'dpnp.int32', 'dpnp.int', 'dpnp.integer'])
     @pytest.mark.parametrize("usm_type",
                              ["host", "device", "shared"],
                              ids=['host', 'device', 'shared'])
@@ -267,6 +267,9 @@ def test_distr(self, dtype, usm_type):
         low = 1
         high = 10
 
+        if dtype in (dpnp.int, dpnp.integer) and dtype != dpnp.dtype('int32'):
+            pytest.skip("dtype isn't alias on dpnp.int32 on the target OS, so there will be a fallback")
+
         sycl_queue = dpctl.SyclQueue()
         data = RandomState(seed, sycl_queue=sycl_queue).randint(low=low,
                                                                 high=high,
@@ -319,7 +322,6 @@ def test_negative_bounds(self):
         assert_array_equal(actual, desired)
 
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_negative_interval(self):
         rs = RandomState(3567)
 
@@ -421,16 +423,16 @@ def test_bounds_fallback(self, low, high):
 
     @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @pytest.mark.parametrize("dtype",
-                             [dpnp.int64, dpnp.integer, dpnp.bool, dpnp.bool_, bool],
-                             ids=['dpnp.int64', 'dpnp.integer', 'dpnp.bool', 'dpnp.bool_', 'bool'])
+                             [dpnp.int64, dpnp.int, dpnp.integer, dpnp.bool, dpnp.bool_, bool],
+                             ids=['dpnp.int64', 'dpnp.int', 'dpnp.integer', 'dpnp.bool', 'dpnp.bool_', 'bool'])
     def test_dtype_fallback(self, dtype):
         seed = 157
         low = -3 if not dtype in {dpnp.bool_, bool} else 0
         high = 37 if not dtype in {dpnp.bool_, bool} else 2
         size = (3, 2, 5)
 
-        if dtype == dpnp.integer and dtype == dpnp.dtype('int32'):
-            pytest.skip("dpnp.integer is alias on dpnp.int32 on the target OS, so no fallback here")
+        if dtype in (dpnp.int, dpnp.integer) and dtype == dpnp.dtype('int32'):
+            pytest.skip("dtype is alias on dpnp.int32 on the target OS, so no fallback here")
 
         # dtype must be int or dpnp.int32, in other cases it will be a fallback to numpy
         actual = RandomState(seed).randint(low=low, high=high, size=size, dtype=dtype).asnumpy()
@@ -714,8 +716,8 @@ class TestUniform:
                              [[1.23, 10.54], [10.54, 1.23]],
                              ids=['(low, high)=[1.23, 10.54]', '(low, high)=[10.54, 1.23]'])
     @pytest.mark.parametrize("dtype",
-                             [dpnp.float32, dpnp.float64, dpnp.int32, None],
-                             ids=['float32', 'float64', 'int32', 'None'])
+                             [dpnp.float32, dpnp.float64, dpnp.float, dpnp.int32, None],
+                             ids=['float32', 'float64', 'float', 'int32', 'None'])
     @pytest.mark.parametrize("usm_type",
                              ["host", "device", "shared"],
                              ids=['host', 'device', 'shared'])
@@ -831,12 +833,15 @@ def test_fallback(self, low, high):
 
 
     @pytest.mark.parametrize("dtype",
-                             [dpnp.float16, dpnp.float, float, dpnp.integer, dpnp.int64, dpnp.int, int,
+                             [dpnp.float16, float, dpnp.integer, dpnp.int64, dpnp.int, int,
                               dpnp.longcomplex, dpnp.complex128, dpnp.complex64, dpnp.bool, dpnp.bool_],
-                             ids=['dpnp.float16', 'dpnp.float', 'float', 'dpnp.integer', 'dpnp.int64', 'dpnp.int', 'int',
+                             ids=['dpnp.float16', 'float', 'dpnp.integer', 'dpnp.int64', 'dpnp.int', 'int',
                                   'dpnp.longcomplex', 'dpnp.complex128', 'dpnp.complex64', 'dpnp.bool', 'dpnp.bool_'])
     def test_invalid_dtype(self, dtype):
-        # dtype must be float32 or float64
+        if dtype in (dpnp.int, dpnp.integer) and dtype == dpnp.dtype('int32'):
+            pytest.skip("dtype is alias on dpnp.int32 on the target OS, so no error here")
+
+        # dtype must be int32, float32 or float64
         assert_raises(TypeError, RandomState().uniform, dtype=dtype)
 
 
diff --git a/tests/test_strides.py b/tests/test_strides.py
index 7ec1d6b3f03f..02e8c8689757 100644
--- a/tests/test_strides.py
+++ b/tests/test_strides.py
@@ -1,8 +1,13 @@
 import math
 import pytest
+from .helper import get_all_dtypes
 
 import dpnp
+
 import numpy
+from numpy.testing import (
+    assert_allclose
+)
 
 
 def _getattr(ex, str_):
@@ -15,12 +20,10 @@ def _getattr(ex, str_):
 
 @pytest.mark.parametrize("func_name",
                          ['abs', ])
-@pytest.mark.parametrize("type",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=['float64', 'float32', 'int64', 'int32'])
-def test_strides(func_name, type):
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
+def test_strides(func_name, dtype):
     shape = (4, 4)
-    a = numpy.arange(shape[0] * shape[1], dtype=type).reshape(shape)
+    a = numpy.arange(shape[0] * shape[1], dtype=dtype).reshape(shape)
     a_strides = a[0::2, 0::2]
     dpa = dpnp.array(a)
     dpa_strides = dpa[0::2, 0::2]
@@ -31,17 +34,16 @@ def test_strides(func_name, type):
     numpy_func = _getattr(numpy, func_name)
     expected = numpy_func(a_strides)
 
-    numpy.testing.assert_allclose(expected, result)
+    assert_allclose(expected, result)
 
 
+@pytest.mark.usefixtures("suppress_divide_invalid_numpy_warnings")
 @pytest.mark.parametrize("func_name",
                          ["arccos", "arccosh", "arcsin", "arcsinh", "arctan", "arctanh", "cbrt", "ceil", "copy", "cos",
                           "cosh", "conjugate", "degrees", "ediff1d", "exp", "exp2", "expm1", "fabs", "floor", "log",
                           "log10", "log1p", "log2", "negative", "radians", "sign", "sin", "sinh", "sqrt", "square",
                           "tanh", "trunc"])
-@pytest.mark.parametrize("dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=["float64", "float32", "int64", "int32"])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
 @pytest.mark.parametrize("shape",
                          [(10,)],
                          ids=["(10,)"])
@@ -58,12 +60,10 @@ def test_strides_1arg(func_name, dtype, shape):
     numpy_func = _getattr(numpy, func_name)
     expected = numpy_func(b)
 
-    numpy.testing.assert_allclose(result, expected)
+    assert_allclose(result, expected)
 
 
-@pytest.mark.parametrize("dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=["float64", "float32", "int64", "int32"])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
 @pytest.mark.parametrize("shape",
                          [(10,)],
                          ids=["(10,)"])
@@ -80,12 +80,10 @@ def test_strides_erf(dtype, shape):
     for idx, val in enumerate(b):
         expected[idx] = math.erf(val)
 
-    numpy.testing.assert_allclose(result, expected)
+    assert_allclose(result, expected)
 
 
-@pytest.mark.parametrize("dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=["float64", "float32", "int64", "int32"])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
 @pytest.mark.parametrize("shape",
                          [(10,)],
                          ids=["(10,)"])
@@ -101,12 +99,10 @@ def test_strides_reciprocal(dtype, shape):
     result = dpnp.reciprocal(dpb)
     expected = numpy.reciprocal(b)
 
-    numpy.testing.assert_allclose(result, expected, rtol=1e-06)
+    assert_allclose(result, expected, rtol=1e-06)
 
 
-@pytest.mark.parametrize("dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=["float64", "float32", "int64", "int32"])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
 @pytest.mark.parametrize("shape",
                          [(10,)],
                          ids=["(10,)"])
@@ -120,14 +116,12 @@ def test_strides_tan(dtype, shape):
     result = dpnp.tan(dpb)
     expected = numpy.tan(b)
 
-    numpy.testing.assert_allclose(result, expected, rtol=1e-06)
+    assert_allclose(result, expected, rtol=1e-06)
 
 
 @pytest.mark.parametrize("func_name",
                          ["add", "arctan2", "hypot", "maximum", "minimum", "multiply", "power", "subtract"])
-@pytest.mark.parametrize("dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=["float64", "float32", "int64", "int32"])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
 @pytest.mark.parametrize("shape",
                          [(3, 3)],
                          ids=["(3, 3)"])
@@ -144,7 +138,7 @@ def test_strides_2args(func_name, dtype, shape):
     numpy_func = _getattr(numpy, func_name)
     expected = numpy_func(a, b)
 
-    numpy.testing.assert_allclose(result, expected)
+    assert_allclose(result, expected)
 
 
 @pytest.mark.parametrize("func_name",
@@ -168,12 +162,10 @@ def test_strides_bitwise(func_name, dtype, shape):
     numpy_func = _getattr(numpy, func_name)
     expected = numpy_func(a, b)
 
-    numpy.testing.assert_allclose(result, expected)
+    assert_allclose(result, expected)
 
 
-@pytest.mark.parametrize("dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=["float64", "float32", "int64", "int32"])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
 @pytest.mark.parametrize("shape",
                          [(3, 3)],
                          ids=["(3, 3)"])
@@ -187,13 +179,10 @@ def test_strides_copysign(dtype, shape):
     result = dpnp.copysign(dpa, dpb)
     expected = numpy.copysign(a, b)
 
-    numpy.testing.assert_allclose(result, expected)
+    assert_allclose(result, expected)
 
 
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
-@pytest.mark.parametrize("dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=["float64", "float32", "int64", "int32"])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
 @pytest.mark.parametrize("shape",
                          [(3, 3)],
                          ids=["(3, 3)"])
@@ -207,13 +196,10 @@ def test_strides_fmod(dtype, shape):
     result = dpnp.fmod(dpa, dpb)
     expected = numpy.fmod(a, b)
 
-    numpy.testing.assert_allclose(result, expected)
+    assert_allclose(result, expected)
 
 
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
-@pytest.mark.parametrize("dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=["float64", "float32", "int64", "int32"])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
 @pytest.mark.parametrize("shape",
                          [(3, 3)],
                          ids=["(3, 3)"])
@@ -227,4 +213,4 @@ def test_strides_true_devide(dtype, shape):
     result = dpnp.fmod(dpa, dpb)
     expected = numpy.fmod(a, b)
 
-    numpy.testing.assert_allclose(result, expected)
+    assert_allclose(result, expected)
diff --git a/tests/test_sycl_queue.py b/tests/test_sycl_queue.py
index 26a71eef2936..1bffa18111b8 100644
--- a/tests/test_sycl_queue.py
+++ b/tests/test_sycl_queue.py
@@ -1,9 +1,15 @@
 import pytest
+from .helper import get_all_dtypes
 
 import dpnp
 import dpctl
 import numpy
 
+from numpy.testing import (
+    assert_array_equal,
+    assert_raises
+)
+
 
 list_of_backend_str = [
     "host",
@@ -17,7 +23,7 @@
     "cpu",
 ]
 
-available_devices = [d for d in dpctl.get_devices() if not d.has_aspect_host]
+available_devices = [d for d in dpctl.get_devices() if not getattr(d, 'has_aspect_host', False)]
 
 valid_devices = []
 for device in available_devices:
@@ -76,27 +82,30 @@ def vvsort(val, vec, size, xp):
     "func, arg, kwargs",
     [
         pytest.param("arange",
-                     -25.7,
+                     [-25.7],
                      {'stop': 10**8, 'step': 15}),
         pytest.param("full",
-                     (2,2),
+                     [(2,2)],
                      {'fill_value': 5}),
+        pytest.param("eye",
+                     [4, 2],
+                     {}),
         pytest.param("ones",
-                     (2,2),
+                     [(2,2)],
                      {}),
         pytest.param("zeros",
-                     (2,2),
+                     [(2,2)],
                      {})
     ])
 @pytest.mark.parametrize("device",
                           valid_devices,
                           ids=[device.filter_string for device in valid_devices])
 def test_array_creation(func, arg, kwargs, device):
-    numpy_array = getattr(numpy, func)(arg, **kwargs)
+    numpy_array = getattr(numpy, func)(*arg, **kwargs)
 
     dpnp_kwargs = dict(kwargs)
     dpnp_kwargs['device'] = device
-    dpnp_array = getattr(dpnp, func)(arg, **dpnp_kwargs)
+    dpnp_array = getattr(dpnp, func)(*arg, **dpnp_kwargs)
 
     numpy.testing.assert_array_equal(numpy_array, dpnp_array)
     assert dpnp_array.sycl_device == device
@@ -152,12 +161,22 @@ def test_array_creation_like(func, kwargs, device_x, device_y):
 
     dpnp_kwargs = dict(kwargs)
     dpnp_kwargs['device'] = device_y
-    
+
     y = getattr(dpnp, func)(x, **dpnp_kwargs)
     numpy.testing.assert_array_equal(y_orig, y)
     assert_sycl_queue_equal(y.sycl_queue, x.to_device(device_y).sycl_queue)
 
 
+@pytest.mark.parametrize("func", ["tril", "triu"], ids=["tril", "triu"])
+@pytest.mark.parametrize("device",
+                          valid_devices,
+                          ids=[device.filter_string for device in valid_devices])
+def test_tril_triu(func, device):
+    x0 = dpnp.ones((3,3), device=device)
+    x = getattr(dpnp, func)(x0)
+    assert_sycl_queue_equal(x.sycl_queue, x0.sycl_queue)
+
+
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @pytest.mark.parametrize(
     "func,data",
@@ -335,36 +354,114 @@ def test_broadcasting(func, data1, data2, device):
     assert_sycl_queue_equal(result_queue, expected_queue)
 
 
+@pytest.mark.parametrize("func", ["add", "copysign", "divide", "floor_divide", "fmod",
+                                  "maximum", "minimum", "multiply", "outer", "power",
+                                  "remainder", "subtract"])
+@pytest.mark.parametrize("device",
+                         valid_devices,
+                         ids=[device.filter_string for device in valid_devices])
+def test_2in_1out_diff_queue_but_equal_context(func, device):
+    x1 = dpnp.arange(10)
+    x2 = dpnp.arange(10, sycl_queue=dpctl.SyclQueue(device))[::-1]
+    with assert_raises(ValueError):
+        getattr(dpnp, func)(x1, x2)
+
+
+@pytest.mark.parametrize(
+    "func, kwargs",
+    [
+        pytest.param("normal",
+                     {'loc': 1.0, 'scale': 3.4, 'size': (5, 12)}),
+        pytest.param("rand",
+                     {'d0': 20}),
+        pytest.param("randint",
+                     {'low': 2, 'high': 15, 'size': (4, 8, 16), 'dtype': dpnp.int32}),
+        pytest.param("randn",
+                     {'d0': 20}),
+        pytest.param("random",
+                     {'size': (35, 45)}),
+        pytest.param("random_integers",
+                     {'low': -17, 'high': 3, 'size': (12, 16)}),
+        pytest.param("random_sample",
+                     {'size': (7, 7)}),
+        pytest.param("ranf",
+                     {'size': (10, 7, 12)}),
+        pytest.param("sample",
+                     {'size': (7, 9)}),
+        pytest.param("standard_normal",
+                     {'size': (4, 4, 8)}),
+        pytest.param("uniform",
+                     {'low': 1.0, 'high': 2.0, 'size': (4, 2, 5)})
+    ])
+@pytest.mark.parametrize("device",
+                         valid_devices,
+                         ids=[device.filter_string for device in valid_devices])
 @pytest.mark.parametrize("usm_type",
                          ["host", "device", "shared"])
-@pytest.mark.parametrize("size",
-                         [None, (), 3, (2, 1), (4, 2, 5)],
-                         ids=['None', '()', '3', '(2,1)', '(4,2,5)'])
-def test_uniform(usm_type, size):
-    low = 1.0
-    high = 2.0
-    res = dpnp.random.uniform(low, high, size=size, usm_type=usm_type)
+def test_random(func, kwargs, device, usm_type):
+    kwargs = {**kwargs, 'device': device, 'usm_type': usm_type}
+
+    # test with default SYCL queue per a device
+    res_array = getattr(dpnp.random, func)(**kwargs)
+    assert device == res_array.sycl_device
+    assert usm_type == res_array.usm_type
 
-    assert usm_type == res.usm_type
+    sycl_queue = dpctl.SyclQueue(device, property="in_order")
+    kwargs['device'] = None
+    kwargs['sycl_queue'] = sycl_queue
 
+    # test with in-order SYCL queue per a device and passed as argument
+    res_array = getattr(dpnp.random, func)(**kwargs)
+    assert usm_type == res_array.usm_type
+    assert_sycl_queue_equal(res_array.sycl_queue, sycl_queue)
 
+
+@pytest.mark.parametrize(
+    "func, args, kwargs",
+    [
+        pytest.param("normal",
+                     [],
+                     {'loc': 1.0, 'scale': 3.4, 'size': (5, 12)}),
+        pytest.param("rand",
+                     [15, 30, 5],
+                     {}),
+        pytest.param("randint",
+                     [],
+                     {'low': 2, 'high': 15, 'size': (4, 8, 16), 'dtype': dpnp.int32}),
+        pytest.param("randn",
+                     [20, 5, 40],
+                     {}),
+        pytest.param("random_sample",
+                     [],
+                     {'size': (7, 7)}),
+        pytest.param("standard_normal",
+                     [],
+                     {'size': (4, 4, 8)}),
+        pytest.param("uniform",
+                     [],
+                     {'low': 1.0, 'high': 2.0, 'size': (4, 2, 5)})
+    ])
+@pytest.mark.parametrize("device",
+                         valid_devices,
+                         ids=[device.filter_string for device in valid_devices])
 @pytest.mark.parametrize("usm_type",
                          ["host", "device", "shared"])
-@pytest.mark.parametrize("seed",
-                         [None, (), 123, (12, 58), (147, 56, 896), [1, 654, 78]],
-                         ids=['None', '()', '123', '(12,58)', '(147,56,896)', '[1,654,78]'])
-def test_rs_uniform(usm_type, seed):
-    seed = 123
-    sycl_queue = dpctl.SyclQueue()
-    low = 1.0
-    high = 2.0
-    rs = dpnp.random.RandomState(seed, sycl_queue=sycl_queue)
-    res = rs.uniform(low, high, usm_type=usm_type)
+def test_random_state(func, args, kwargs, device, usm_type):
+    kwargs = {**kwargs, 'usm_type': usm_type}
 
-    assert usm_type == res.usm_type
+    # test with default SYCL queue per a device
+    rs = dpnp.random.RandomState(seed=1234567, device=device)
+    res_array = getattr(rs, func)(*args, **kwargs)
+    assert device == res_array.sycl_device
+    assert usm_type == res_array.usm_type
 
-    res_sycl_queue = res.get_array().sycl_queue
-    assert_sycl_queue_equal(res_sycl_queue, sycl_queue)
+    sycl_queue = dpctl.SyclQueue(device, property="in_order")
+
+    # test with in-order SYCL queue per a device and passed as argument
+    rs = dpnp.random.RandomState((147, 56, 896), sycl_queue=sycl_queue)
+    res_array = getattr(rs, func)(*args, **kwargs)
+    assert usm_type == res_array.usm_type
+    assert_sycl_queue_equal(res_array.sycl_queue, sycl_queue)
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
@@ -569,7 +666,7 @@ def test_eig(device):
     dpnp_val_queue = dpnp_val.get_array().sycl_queue
     dpnp_vec_queue = dpnp_vec.get_array().sycl_queue
 
-    # compare queue and device    
+    # compare queue and device
     assert_sycl_queue_equal(dpnp_val_queue, expected_queue)
     assert_sycl_queue_equal(dpnp_vec_queue, expected_queue)
 
@@ -655,7 +752,6 @@ def test_qr(device):
     assert_sycl_queue_equal(dpnp_r_queue, expected_queue)
 
 
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @pytest.mark.parametrize("device",
                         valid_devices,
                         ids=[device.filter_string for device in valid_devices])
@@ -663,7 +759,7 @@ def test_svd(device):
     tol = 1e-12
     shape = (2,2)
     numpy_data = numpy.arange(shape[0] * shape[1]).reshape(shape)
-    dpnp_data = dpnp.arange(shape[0] * shape[1]).reshape(shape)
+    dpnp_data = dpnp.arange(shape[0] * shape[1], device=device).reshape(shape)
     np_u, np_s, np_vt = numpy.linalg.svd(numpy_data)
     dpnp_u, dpnp_s, dpnp_vt = dpnp.linalg.svd(dpnp_data)
 
@@ -675,7 +771,7 @@ def test_svd(device):
     assert (dpnp_vt.shape == np_vt.shape)
 
     # check decomposition
-    dpnp_diag_s = dpnp.zeros(shape, dtype=dpnp_s.dtype)
+    dpnp_diag_s = dpnp.zeros(shape, dtype=dpnp_s.dtype, device=device)
     for i in range(dpnp_s.size):
         dpnp_diag_s[i, i] = dpnp_s[i]
 
@@ -739,3 +835,39 @@ def test_array_copy(device, func, device_param, queue_param):
     result = dpnp.array(dpnp_data, **kwargs)
 
     assert_sycl_queue_equal(result.sycl_queue, dpnp_data.sycl_queue)
+
+
+@pytest.mark.parametrize("device",
+                         valid_devices,
+                         ids=[device.filter_string for device in valid_devices])
+#TODO need to delete no_bool=True when use dlpack > 0.7 version
+@pytest.mark.parametrize("arr_dtype", get_all_dtypes(no_float16=True, no_bool=True))
+@pytest.mark.parametrize("shape", [tuple(), (2,), (3, 0, 1), (2, 2, 2)])
+def test_from_dlpack(arr_dtype, shape, device):
+    X = dpnp.empty(shape=shape, dtype=arr_dtype, device=device)
+    Y = dpnp.from_dlpack(X)
+    assert_array_equal(X, Y)
+    assert X.__dlpack_device__() == Y.__dlpack_device__()
+    assert X.sycl_device == Y.sycl_device
+    assert X.sycl_context == Y.sycl_context
+    assert X.usm_type == Y.usm_type
+    if Y.ndim:
+        V = Y[::-1]
+        W = dpnp.from_dlpack(V)
+        assert V.strides == W.strides
+
+
+@pytest.mark.parametrize("device",
+                         valid_devices,
+                         ids=[device.filter_string for device in valid_devices])
+#TODO need to delete no_bool=True when use dlpack > 0.7 version
+@pytest.mark.parametrize("arr_dtype", get_all_dtypes(no_float16=True, no_bool=True))
+def test_from_dlpack_with_dpt(arr_dtype, device):
+    X = dpctl.tensor.empty((64,), dtype=arr_dtype, device=device)
+    Y = dpnp.from_dlpack(X)
+    assert_array_equal(X, Y)
+    assert isinstance(Y, dpnp.dpnp_array.dpnp_array)
+    assert X.__dlpack_device__() == Y.__dlpack_device__()
+    assert X.sycl_device == Y.sycl_device
+    assert X.sycl_context == Y.sycl_context
+    assert X.usm_type == Y.usm_type
diff --git a/tests/test_usm_type.py b/tests/test_usm_type.py
index 094fe419c263..1a33a1d655dd 100644
--- a/tests/test_usm_type.py
+++ b/tests/test_usm_type.py
@@ -11,16 +11,17 @@
 ]
 
 
-@pytest.mark.parametrize("usm_type", list_of_usm_types, ids=list_of_usm_types)
-def test_coerced_usm_types_sum(usm_type):
-    x = dp.arange(10, usm_type = "device")
-    y = dp.arange(10, usm_type = usm_type)
+@pytest.mark.parametrize("usm_type_x", list_of_usm_types, ids=list_of_usm_types)
+@pytest.mark.parametrize("usm_type_y", list_of_usm_types, ids=list_of_usm_types)
+def test_coerced_usm_types_sum(usm_type_x, usm_type_y):
+    x = dp.arange(1000, usm_type = usm_type_x)
+    y = dp.arange(1000, usm_type = usm_type_y)
 
-    z = x + y
-    
-    assert z.usm_type == x.usm_type
-    assert z.usm_type == "device"
-    assert y.usm_type == usm_type
+    z = 1.3 + x + y + 2
+
+    assert x.usm_type == usm_type_x
+    assert y.usm_type == usm_type_y
+    assert z.usm_type == du.get_coerced_usm_type([usm_type_x, usm_type_y])
 
 
 @pytest.mark.parametrize("usm_type_x", list_of_usm_types, ids=list_of_usm_types)
@@ -29,8 +30,34 @@ def test_coerced_usm_types_mul(usm_type_x, usm_type_y):
     x = dp.arange(10, usm_type = usm_type_x)
     y = dp.arange(10, usm_type = usm_type_y)
 
-    z = x * y
-    
+    z = 3 * x * y * 1.5
+
+    assert x.usm_type == usm_type_x
+    assert y.usm_type == usm_type_y
+    assert z.usm_type == du.get_coerced_usm_type([usm_type_x, usm_type_y])
+
+
+@pytest.mark.parametrize("usm_type_x", list_of_usm_types, ids=list_of_usm_types)
+@pytest.mark.parametrize("usm_type_y", list_of_usm_types, ids=list_of_usm_types)
+def test_coerced_usm_types_subtract(usm_type_x, usm_type_y):
+    x = dp.arange(50, usm_type = usm_type_x)
+    y = dp.arange(50, usm_type = usm_type_y)
+
+    z = 20 - x - y - 7.4
+
+    assert x.usm_type == usm_type_x
+    assert y.usm_type == usm_type_y
+    assert z.usm_type == du.get_coerced_usm_type([usm_type_x, usm_type_y])
+
+
+@pytest.mark.parametrize("usm_type_x", list_of_usm_types, ids=list_of_usm_types)
+@pytest.mark.parametrize("usm_type_y", list_of_usm_types, ids=list_of_usm_types)
+def test_coerced_usm_types_divide(usm_type_x, usm_type_y):
+    x = dp.arange(120, usm_type = usm_type_x)
+    y = dp.arange(120, usm_type = usm_type_y)
+
+    z = 2 / x / y / 1.5
+
     assert x.usm_type == usm_type_x
     assert y.usm_type == usm_type_y
     assert z.usm_type == du.get_coerced_usm_type([usm_type_x, usm_type_y])
@@ -61,3 +88,32 @@ def test_array_creation(func, args, usm_type_x, usm_type_y):
 
     assert x.usm_type == usm_type_x
     assert y.usm_type == usm_type_y
+
+
+@pytest.mark.skip()
+@pytest.mark.parametrize("func", ["tril", "triu"], ids=["tril", "triu"])
+@pytest.mark.parametrize("usm_type", list_of_usm_types, ids=list_of_usm_types)
+def test_tril_triu(func, usm_type):
+    x0 = dp.ones((3,3), usm_type=usm_type)
+    x = getattr(dp, func)(x0)
+    assert x.usm_type == usm_type
+
+
+@pytest.mark.parametrize("op",
+                         ['equal', 'greater', 'greater_equal', 'less', 'less_equal',
+                          'logical_and', 'logical_or', 'logical_xor', 'not_equal'],
+                         ids=['equal', 'greater', 'greater_equal', 'less', 'less_equal',
+                              'logical_and', 'logical_or', 'logical_xor', 'not_equal'])
+@pytest.mark.parametrize("usm_type_x", list_of_usm_types, ids=list_of_usm_types)
+@pytest.mark.parametrize("usm_type_y", list_of_usm_types, ids=list_of_usm_types)
+def test_coerced_usm_types_logic_op(op, usm_type_x, usm_type_y):
+    x = dp.arange(100, usm_type = usm_type_x)
+    y = dp.arange(100, usm_type = usm_type_y)[::-1]
+
+    z = getattr(dp, op)(x, y)
+    zx = getattr(dp, op)(x, 50)
+    zy = getattr(dp, op)(30, y)
+
+    assert x.usm_type == zx.usm_type == usm_type_x
+    assert y.usm_type == zy.usm_type == usm_type_y
+    assert z.usm_type == du.get_coerced_usm_type([usm_type_x, usm_type_y])
diff --git a/tests/third_party/cupy/creation_tests/test_basic.py b/tests/third_party/cupy/creation_tests/test_basic.py
index 337718d3caf3..1adcf98f969b 100644
--- a/tests/third_party/cupy/creation_tests/test_basic.py
+++ b/tests/third_party/cupy/creation_tests/test_basic.py
@@ -164,7 +164,7 @@ def test_empty_zero_sized_array_strides(self, order):
     @testing.for_all_dtypes()
     @testing.numpy_cupy_array_equal()
     def test_eye(self, xp, dtype):
-        return xp.eye(5, 4, 1, dtype)
+        return xp.eye(5, 4, k=1, dtype=dtype)
 
     @testing.for_all_dtypes()
     @testing.numpy_cupy_array_equal()
diff --git a/tests/third_party/cupy/creation_tests/test_from_data.py b/tests/third_party/cupy/creation_tests/test_from_data.py
index e07d927b1cf0..ce71ef311a56 100644
--- a/tests/third_party/cupy/creation_tests/test_from_data.py
+++ b/tests/third_party/cupy/creation_tests/test_from_data.py
@@ -454,6 +454,7 @@ def test_asfortranarray_cuda_array_zero_dim_dtype(
         a = xp.ones((), dtype=dtype_a)
         return xp.asfortranarray(a, dtype=dtype_b)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.numpy_cupy_array_equal()
     def test_fromfile(self, xp):
         with tempfile.TemporaryFile() as fh:
diff --git a/tests/third_party/cupy/creation_tests/test_matrix.py b/tests/third_party/cupy/creation_tests/test_matrix.py
index a5471f213ebf..fe144cbc58c4 100644
--- a/tests/third_party/cupy/creation_tests/test_matrix.py
+++ b/tests/third_party/cupy/creation_tests/test_matrix.py
@@ -140,6 +140,7 @@ def test_tri_posi(self, xp, dtype):
     {'shape': (2, 3, 4)},
 )
 @testing.gpu
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 class TestTriLowerAndUpper(unittest.TestCase):
 
     @testing.for_all_dtypes(no_complex=True)
@@ -148,7 +149,6 @@ def test_tril(self, xp, dtype):
         m = testing.shaped_arange(self.shape, xp, dtype)
         return xp.tril(m)
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.numpy_cupy_array_equal()
     def test_tril_array_like(self, xp):
         return xp.tril([[1, 2], [3, 4]])
@@ -157,13 +157,13 @@ def test_tril_array_like(self, xp):
     @testing.numpy_cupy_array_equal()
     def test_tril_nega(self, xp, dtype):
         m = testing.shaped_arange(self.shape, xp, dtype)
-        return xp.tril(m, -1)
+        return xp.tril(m, k=-1)
 
     @testing.for_all_dtypes(no_complex=True)
     @testing.numpy_cupy_array_equal()
     def test_tril_posi(self, xp, dtype):
         m = testing.shaped_arange(self.shape, xp, dtype)
-        return xp.tril(m, 1)
+        return xp.tril(m, k=1)
 
     @testing.for_all_dtypes(no_complex=True)
     @testing.numpy_cupy_array_equal()
@@ -171,7 +171,6 @@ def test_triu(self, xp, dtype):
         m = testing.shaped_arange(self.shape, xp, dtype)
         return xp.triu(m)
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.numpy_cupy_array_equal()
     def test_triu_array_like(self, xp):
         return xp.triu([[1, 2], [3, 4]])
@@ -180,10 +179,10 @@ def test_triu_array_like(self, xp):
     @testing.numpy_cupy_array_equal()
     def test_triu_nega(self, xp, dtype):
         m = testing.shaped_arange(self.shape, xp, dtype)
-        return xp.triu(m, -1)
+        return xp.triu(m, k=-1)
 
     @testing.for_all_dtypes(no_complex=True)
     @testing.numpy_cupy_array_equal()
     def test_triu_posi(self, xp, dtype):
         m = testing.shaped_arange(self.shape, xp, dtype)
-        return xp.triu(m, 1)
+        return xp.triu(m, k=1)
diff --git a/tests/third_party/cupy/creation_tests/test_ranges.py b/tests/third_party/cupy/creation_tests/test_ranges.py
index 75960e492c17..4d5bc03f81b0 100644
--- a/tests/third_party/cupy/creation_tests/test_ranges.py
+++ b/tests/third_party/cupy/creation_tests/test_ranges.py
@@ -54,7 +54,7 @@ def test_arange8(self, xp, dtype):
 
     def test_arange9(self):
         for xp in (numpy, cupy):
-            with pytest.raises(ValueError):
+            with pytest.raises((ValueError, TypeError)):
                 xp.arange(10, dtype=xp.bool_)
 
     @testing.numpy_cupy_array_equal()
diff --git a/tests/third_party/cupy/indexing_tests/test_generate.py b/tests/third_party/cupy/indexing_tests/test_generate.py
index d10e503bcec8..2bb0404ab599 100644
--- a/tests/third_party/cupy/indexing_tests/test_generate.py
+++ b/tests/third_party/cupy/indexing_tests/test_generate.py
@@ -28,7 +28,7 @@ def test_indices_list2(self, xp, dtype):
 
     def test_indices_list3(self):
         for xp in (numpy, cupy):
-            with pytest.raises(ValueError):
+            with pytest.raises((ValueError, TypeError)):
                 xp.indices((1, 2, 3, 4), dtype=xp.bool_)
 
 
diff --git a/tests/third_party/cupy/indexing_tests/test_insert.py b/tests/third_party/cupy/indexing_tests/test_insert.py
index ed6a156e8848..fdcc5357e19e 100644
--- a/tests/third_party/cupy/indexing_tests/test_insert.py
+++ b/tests/third_party/cupy/indexing_tests/test_insert.py
@@ -42,7 +42,7 @@ class TestPlaceRaises(unittest.TestCase):
     def test_place_empty_value_error(self, dtype):
         for xp in (numpy, cupy):
             a = testing.shaped_arange(self.shape, xp, dtype)
-            mask = testing.shaped_arange(self.shape, xp, numpy.int) % 2 == 0
+            mask = testing.shaped_arange(self.shape, xp, numpy.int_) % 2 == 0
             vals = testing.shaped_random((0,), xp, dtype)
             with pytest.raises(ValueError):
                 xp.place(a, mask, vals)
diff --git a/tests/third_party/cupy/logic_tests/test_comparison.py b/tests/third_party/cupy/logic_tests/test_comparison.py
index 0be9eaeee610..67848359188d 100644
--- a/tests/third_party/cupy/logic_tests/test_comparison.py
+++ b/tests/third_party/cupy/logic_tests/test_comparison.py
@@ -8,7 +8,6 @@
 from tests.third_party.cupy import testing
 
 
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestComparison(unittest.TestCase):
 
@@ -38,7 +37,6 @@ def test_equal(self):
         self.check_binary('equal')
 
 
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestComparisonOperator(unittest.TestCase):
 
diff --git a/tests/third_party/cupy/logic_tests/test_ops.py b/tests/third_party/cupy/logic_tests/test_ops.py
index 55b8617882b1..cdbd035cd265 100644
--- a/tests/third_party/cupy/logic_tests/test_ops.py
+++ b/tests/third_party/cupy/logic_tests/test_ops.py
@@ -20,18 +20,14 @@ def check_binary(self, name, xp, dtype):
         b = testing.shaped_reverse_arange((2, 3), xp, dtype)
         return getattr(xp, name)(a, b)
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_logical_and(self):
         self.check_binary('logical_and')
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_logical_or(self):
         self.check_binary('logical_or')
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_logical_xor(self):
         self.check_binary('logical_xor')
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_logical_not(self):
         self.check_unary('logical_not')
diff --git a/tests/third_party/cupy/math_tests/test_arithmetic.py b/tests/third_party/cupy/math_tests/test_arithmetic.py
index 28771b4979b5..027722d8bef2 100644
--- a/tests/third_party/cupy/math_tests/test_arithmetic.py
+++ b/tests/third_party/cupy/math_tests/test_arithmetic.py
@@ -1,5 +1,6 @@
 import itertools
 import unittest
+import warnings
 
 import numpy
 import pytest
@@ -130,8 +131,8 @@ def check_binary(self, xp):
 
         func = getattr(xp, self.name)
         with testing.NumpyError(divide='ignore'):
-            with numpy.warnings.catch_warnings():
-                numpy.warnings.filterwarnings('ignore')
+            with warnings.catch_warnings():
+                warnings.filterwarnings('ignore')
                 if self.use_dtype:
                     y = func(arg1, arg2, dtype=self.dtype)
                 else:
@@ -145,17 +146,14 @@ def check_binary(self, xp):
                 y = y.astype(numpy.complex64)
 
         # NumPy returns an output array of another type than DPNP when input ones have diffrent types.
-        if self.name == 'multiply' and xp is cupy:
-            if xp.isscalar(arg1) and xp.isscalar(arg2):
-                # If both are scalars, the result will be a scalar, so needs to convert into numpy-scalar.
-                y = numpy.asarray(y)
-            elif dtype1 != dtype2:
-                is_array_arg1 = not xp.isscalar(arg1)
-                is_array_arg2 = not xp.isscalar(arg2)
+        if xp is cupy and dtype1 != dtype2 and not self.use_dtype:
+            is_array_arg1 = not xp.isscalar(arg1)
+            is_array_arg2 = not xp.isscalar(arg2)
 
-                is_int_float = lambda _x, _y: numpy.issubdtype(_x, numpy.integer) and numpy.issubdtype(_y, numpy.floating)
-                is_same_type = lambda _x, _y, _type: numpy.issubdtype(_x, _type) and numpy.issubdtype(_y, _type)
+            is_int_float = lambda _x, _y: numpy.issubdtype(_x, numpy.integer) and numpy.issubdtype(_y, numpy.floating)
+            is_same_type = lambda _x, _y, _type: numpy.issubdtype(_x, _type) and numpy.issubdtype(_y, _type)
 
+            if self.name in ('add', 'multiply', 'subtract'):
                 if is_array_arg1 and is_array_arg2:
                     # If both inputs are arrays where one is of floating type and another - integer,
                     # NumPy will return an output array of always "float64" type,
@@ -170,6 +168,13 @@ def check_binary(self, xp):
                         y = y.astype(dtype1)
                     elif is_array_arg2 and not is_array_arg1:
                         y = y.astype(dtype2)
+            elif self.name in ('divide', 'true_divide'):
+                # If one input is an array of float32 and another - an integer or floating scalar,
+                # NumPy will return an output array of float32, while DPNP will return the array of float64,
+                # since NumPy would use the same float64 type when instead of scalar here is array of integer of floating type.
+                if not (is_array_arg1 and is_array_arg2):
+                    if (is_array_arg1 and arg1.dtype == numpy.float32) ^ (is_array_arg2 and arg2.dtype == numpy.float32):
+                        y = y.astype(numpy.float32)
 
         # NumPy returns different values (nan/inf) on division by zero
         # depending on the architecture.
@@ -187,7 +192,6 @@ def check_binary(self, xp):
 @testing.gpu
 @testing.parameterize(*(
     testing.product({
-        # TODO(unno): boolean subtract causes DeprecationWarning in numpy>=1.13
         'arg1': [testing.shaped_arange((2, 3), numpy, dtype=d)
                  for d in all_types
                  ] + [0, 0.0, 2, 2.0],
@@ -282,7 +286,6 @@ def test_modf(self, xp, dtype):
     'xp': [numpy, cupy],
     'shape': [(3, 2), (), (3, 0, 2)]
 }))
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestBoolSubtract(unittest.TestCase):
 
diff --git a/tests/third_party/cupy/random_tests/test_sample.py b/tests/third_party/cupy/random_tests/test_sample.py
index 3f8a0169ac12..f3b844cdc6a5 100644
--- a/tests/third_party/cupy/random_tests/test_sample.py
+++ b/tests/third_party/cupy/random_tests/test_sample.py
@@ -33,7 +33,6 @@ def test_lo_hi_nonrandom(self):
         a = random.randint(-1.1, -0.9, size=(2, 2))
         numpy.testing.assert_array_equal(a, cupy.full((2, 2), -1))
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_zero_sizes(self):
         a = random.randint(10, size=(0,))
         numpy.testing.assert_array_equal(a, cupy.array(()))
@@ -112,7 +111,6 @@ def test_goodness_of_fit_2(self):
         self.assertTrue(hypothesis.chi_square_test(counts, expected))
 
 
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestRandintDtype(unittest.TestCase):
 
diff --git a/tests/third_party/cupy/statistics_tests/test_meanvar.py b/tests/third_party/cupy/statistics_tests/test_meanvar.py
index aea22d02c511..60d3413b0daa 100644
--- a/tests/third_party/cupy/statistics_tests/test_meanvar.py
+++ b/tests/third_party/cupy/statistics_tests/test_meanvar.py
@@ -89,7 +89,6 @@ def test_median_axis_sequence(self, xp, dtype):
         return xp.median(a, self.axis, keepdims=self.keepdims)
 
 
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestAverage(unittest.TestCase):
 
@@ -101,12 +100,14 @@ def test_average_all(self, xp, dtype):
         a = testing.shaped_arange((2, 3), xp, dtype)
         return xp.average(a)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose()
     def test_average_axis(self, xp, dtype):
         a = testing.shaped_arange((2, 3, 4), xp, dtype)
         return xp.average(a, axis=1)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose()
     def test_average_weights(self, xp, dtype):
@@ -114,6 +115,7 @@ def test_average_weights(self, xp, dtype):
         w = testing.shaped_arange((2, 3), xp, dtype)
         return xp.average(a, weights=w)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose()
     def test_average_axis_weights(self, xp, dtype):
@@ -132,6 +134,7 @@ def check_returned(self, a, axis, weights):
         testing.assert_allclose(average_cpu, average_gpu)
         testing.assert_allclose(sum_weights_cpu, sum_weights_gpu)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes()
     def test_returned(self, dtype):
         a = testing.shaped_arange((2, 3), numpy, dtype)
diff --git a/tests_external/skipped_tests_numpy.tbl b/tests_external/skipped_tests_numpy.tbl
index 30b66da5e663..c2c0dc78ec54 100644
--- a/tests_external/skipped_tests_numpy.tbl
+++ b/tests_external/skipped_tests_numpy.tbl
@@ -318,83 +318,6 @@ tests/test_datetime.py::TestDateTime::test_timedelta_np_int_construction[Y]
 tests/test_datetime.py::TestDateTime::test_timedelta_object_array_conversion
 tests/test_datetime.py::TestDateTime::test_timedelta_scalar_construction
 tests/test_datetime.py::TestDateTime::test_timedelta_scalar_construction_units
-tests/test_defchararray.py::TestBasic::test_from_object_array
-tests/test_defchararray.py::TestBasic::test_from_object_array_unicode
-tests/test_defchararray.py::TestBasic::test_from_string
-tests/test_defchararray.py::TestBasic::test_from_string_array
-tests/test_defchararray.py::TestBasic::test_from_unicode
-tests/test_defchararray.py::TestBasic::test_from_unicode_array
-tests/test_defchararray.py::TestBasic::test_unicode_upconvert
-tests/test_defchararray.py::TestChar::test_it
-tests/test_defchararray.py::TestComparisonsMixed1::test_equal
-tests/test_defchararray.py::TestComparisonsMixed1::test_greater
-tests/test_defchararray.py::TestComparisonsMixed1::test_greater_equal
-tests/test_defchararray.py::TestComparisonsMixed1::test_less
-tests/test_defchararray.py::TestComparisonsMixed1::test_less_equal
-tests/test_defchararray.py::TestComparisonsMixed1::test_not_equal
-tests/test_defchararray.py::TestComparisonsMixed2::test_equal
-tests/test_defchararray.py::TestComparisonsMixed2::test_greater
-tests/test_defchararray.py::TestComparisonsMixed2::test_greater_equal
-tests/test_defchararray.py::TestComparisonsMixed2::test_less
-tests/test_defchararray.py::TestComparisonsMixed2::test_less_equal
-tests/test_defchararray.py::TestComparisonsMixed2::test_not_equal
-tests/test_defchararray.py::TestComparisons::test_equal
-tests/test_defchararray.py::TestComparisons::test_greater
-tests/test_defchararray.py::TestComparisons::test_greater_equal
-tests/test_defchararray.py::TestComparisons::test_less
-tests/test_defchararray.py::TestComparisons::test_less_equal
-tests/test_defchararray.py::TestComparisons::test_not_equal
-tests/test_defchararray.py::test_empty_indexing
-tests/test_defchararray.py::TestInformation::test_count
-tests/test_defchararray.py::TestInformation::test_endswith
-tests/test_defchararray.py::TestInformation::test_find
-tests/test_defchararray.py::TestInformation::test_index
-tests/test_defchararray.py::TestInformation::test_isalnum
-tests/test_defchararray.py::TestInformation::test_isalpha
-tests/test_defchararray.py::TestInformation::test_isdigit
-tests/test_defchararray.py::TestInformation::test_islower
-tests/test_defchararray.py::TestInformation::test_isspace
-tests/test_defchararray.py::TestInformation::test_istitle
-tests/test_defchararray.py::TestInformation::test_isupper
-tests/test_defchararray.py::TestInformation::test_len
-tests/test_defchararray.py::TestInformation::test_rfind
-tests/test_defchararray.py::TestInformation::test_rindex
-tests/test_defchararray.py::TestInformation::test_startswith
-tests/test_defchararray.py::TestMethods::test_capitalize
-tests/test_defchararray.py::TestMethods::test_center
-tests/test_defchararray.py::TestMethods::test_decode
-tests/test_defchararray.py::TestMethods::test_encode
-tests/test_defchararray.py::TestMethods::test_expandtabs
-tests/test_defchararray.py::TestMethods::test_isdecimal
-tests/test_defchararray.py::TestMethods::test_isnumeric
-tests/test_defchararray.py::TestMethods::test_join
-tests/test_defchararray.py::TestMethods::test_ljust
-tests/test_defchararray.py::TestMethods::test_lower
-tests/test_defchararray.py::TestMethods::test_lstrip
-tests/test_defchararray.py::TestMethods::test_partition
-tests/test_defchararray.py::TestMethods::test_replace
-tests/test_defchararray.py::TestMethods::test_rjust
-tests/test_defchararray.py::TestMethods::test_rpartition
-tests/test_defchararray.py::TestMethods::test_rsplit
-tests/test_defchararray.py::TestMethods::test_rstrip
-tests/test_defchararray.py::TestMethods::test_split
-tests/test_defchararray.py::TestMethods::test_splitlines
-tests/test_defchararray.py::TestMethods::test_strip
-tests/test_defchararray.py::TestMethods::test_swapcase
-tests/test_defchararray.py::TestMethods::test_title
-tests/test_defchararray.py::TestMethods::test_upper
-tests/test_defchararray.py::TestOperations::test_add
-tests/test_defchararray.py::TestOperations::test_mod
-tests/test_defchararray.py::TestOperations::test_mul
-tests/test_defchararray.py::TestOperations::test_radd
-tests/test_defchararray.py::TestOperations::test_rmod
-tests/test_defchararray.py::TestOperations::test_rmul
-tests/test_defchararray.py::TestOperations::test_slice
-tests/test_defchararray.py::TestVecString::test_invalid_args_tuple
-tests/test_defchararray.py::TestVecString::test_invalid_function_args
-tests/test_defchararray.py::TestVecString::test_invalid_result_type
-tests/test_defchararray.py::TestVecString::test_non_string_array
-tests/test_defchararray.py::TestWhitespace::test1
 tests/test_deprecations.py::TestAlen::test_alen
 tests/test_deprecations.py::TestArrayDataAttributeAssignmentDeprecation::test_data_attr_assignment
 tests/test_deprecations.py::TestBinaryReprInsufficientWidthParameterForRepresentation::test_insufficient_width_negative
diff --git a/utils/command_build_clib.py b/utils/command_build_clib.py
index 95887cc65aaa..d16bab3aec4a 100644
--- a/utils/command_build_clib.py
+++ b/utils/command_build_clib.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2022, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -63,7 +63,7 @@
 # default variables (for Linux)
 _project_compiler = "icpx"
 _project_linker = "icpx"
-_project_cmplr_flag_sycl_devel = ["-fsycl-device-code-split=per_kernel", "-fno-approx-func"]
+_project_cmplr_flag_sycl_devel = ["-fsycl-device-code-split=per_kernel", "-fno-approx-func", "-fno-finite-math-only"]
 _project_cmplr_flag_sycl = ["-fsycl"]
 _project_cmplr_flag_stdcpp_static = []  # This brakes TBB ["-static-libstdc++", "-static-libgcc"]
 _project_cmplr_flag_compatibility = ["-Wl,--enable-new-dtags"]