From 684f3938dd99eb41a5b95231c9f6a748f8b1b933 Mon Sep 17 00:00:00 2001
From: Anton <100830759+antonwolfy@users.noreply.github.com>
Date: Tue, 7 Mar 2023 01:04:34 +0100
Subject: [PATCH 001/129] Add dpnp.broadcast_to() function (#1333)

---
 dpnp/dpnp_iface_manipulation.py |  44 ++++++++
 tests/skipped_tests.tbl         |   7 +-
 tests/skipped_tests_gpu.tbl     |   7 +-
 tests/test_arraymanipulation.py | 173 +++++++++++++++++++++++++-------
 tests/test_sycl_queue.py        |  12 ++-
 tests/test_usm_type.py          |   8 ++
 6 files changed, 201 insertions(+), 50 deletions(-)

diff --git a/dpnp/dpnp_iface_manipulation.py b/dpnp/dpnp_iface_manipulation.py
index adc2bdf15f31..567661bdb57f 100644
--- a/dpnp/dpnp_iface_manipulation.py
+++ b/dpnp/dpnp_iface_manipulation.py
@@ -47,7 +47,10 @@
 from dpnp.dpnp_iface_arraycreation import array
 
 import dpnp
+from dpnp.dpnp_array import dpnp_array
+
 import numpy
+import dpctl.tensor as dpt
 
 
 __all__ = [
@@ -55,6 +58,7 @@
     "atleast_1d",
     "atleast_2d",
     "atleast_3d",
+    "broadcast_to",
     "concatenate",
     "copyto",
     "expand_dims",
@@ -190,6 +194,46 @@ def atleast_3d(*arys):
     return call_origin(numpy.atleast_3d, *arys)
 
 
+def broadcast_to(x, /, shape, subok=False):
+    """
+    Broadcast an array to a new shape.
+
+    For full documentation refer to :obj:`numpy.broadcast_to`.
+
+    Returns
+    -------
+    y : dpnp.ndarray
+        An array having a specified shape. Must have the same data type as `x`.
+
+    Limitations
+    -----------
+    Parameter `x` is supported as either :class:`dpnp.ndarray`
+    or :class:`dpctl.tensor.usm_ndarray`.
+    Parameter `subok` is supported with default value.
+    Otherwise the function will be executed sequentially on CPU.
+    Input array data types of `x` is limited by supported DPNP :ref:`Data types`.
+
+    Examples
+    --------
+    >>> import dpnp as dp
+    >>> x = dp.array([1, 2, 3])
+    >>> dp.broadcast_to(x, (3, 3))
+    array([[1, 2, 3],
+           [1, 2, 3],
+           [1, 2, 3]])
+
+    """
+
+    if subok is not False:
+        pass
+    elif isinstance(x, dpnp_array) or isinstance(x, dpt.usm_ndarray):
+        dpt_array = x.get_array() if isinstance(x, dpnp_array) else x
+        new_array = dpt.broadcast_to(dpt_array, shape)
+        return dpnp_array._create_from_usm_ndarray(new_array)
+
+    return call_origin(numpy.broadcast_to, x, shape=shape, subok=subok)
+
+
 def concatenate(arrs, axis=0, out=None, dtype=None, casting="same_kind"):
     """
     Join a sequence of arrays along an existing axis.
diff --git a/tests/skipped_tests.tbl b/tests/skipped_tests.tbl
index 26dd6fc59cd7..bda10cfd4973 100644
--- a/tests/skipped_tests.tbl
+++ b/tests/skipped_tests.tbl
@@ -648,12 +648,7 @@ tests/third_party/cupy/manipulation_tests/test_dims.py::TestBroadcast_param_8_{s
 tests/third_party/cupy/manipulation_tests/test_dims.py::TestBroadcast_param_8_{shapes=[(2, 0, 1, 1, 3), (2, 1, 0, 0, 3)]}::test_broadcast_arrays
 tests/third_party/cupy/manipulation_tests/test_dims.py::TestBroadcast_param_9_{shapes=[(0, 1, 1, 3), (2, 1, 0, 0, 3)]}::test_broadcast
 tests/third_party/cupy/manipulation_tests/test_dims.py::TestBroadcast_param_9_{shapes=[(0, 1, 1, 3), (2, 1, 0, 0, 3)]}::test_broadcast_arrays
-tests/third_party/cupy/manipulation_tests/test_dims.py::TestDims::test_broadcast_to
-tests/third_party/cupy/manipulation_tests/test_dims.py::TestDims::test_broadcast_to_fail
-tests/third_party/cupy/manipulation_tests/test_dims.py::TestDims::test_broadcast_to_fail_numpy19
-tests/third_party/cupy/manipulation_tests/test_dims.py::TestDims::test_broadcast_to_numpy19
-tests/third_party/cupy/manipulation_tests/test_dims.py::TestDims::test_broadcast_to_short_shape
-tests/third_party/cupy/manipulation_tests/test_dims.py::TestDims::test_broadcast_to_short_shape_numpy19
+
 tests/third_party/cupy/manipulation_tests/test_dims.py::TestDims::test_squeeze_int_axis_failure1
 tests/third_party/cupy/manipulation_tests/test_dims.py::TestDims::test_squeeze_int_axis_failure2
 tests/third_party/cupy/manipulation_tests/test_dims.py::TestDims::test_squeeze_scalar_failure1
diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl
index 7e9b9e5505de..3e0026759334 100644
--- a/tests/skipped_tests_gpu.tbl
+++ b/tests/skipped_tests_gpu.tbl
@@ -853,12 +853,7 @@ tests/third_party/cupy/manipulation_tests/test_dims.py::TestBroadcast_param_8_{s
 tests/third_party/cupy/manipulation_tests/test_dims.py::TestBroadcast_param_8_{shapes=[(2, 0, 1, 1, 3), (2, 1, 0, 0, 3)]}::test_broadcast_arrays
 tests/third_party/cupy/manipulation_tests/test_dims.py::TestBroadcast_param_9_{shapes=[(0, 1, 1, 3), (2, 1, 0, 0, 3)]}::test_broadcast
 tests/third_party/cupy/manipulation_tests/test_dims.py::TestBroadcast_param_9_{shapes=[(0, 1, 1, 3), (2, 1, 0, 0, 3)]}::test_broadcast_arrays
-tests/third_party/cupy/manipulation_tests/test_dims.py::TestDims::test_broadcast_to
-tests/third_party/cupy/manipulation_tests/test_dims.py::TestDims::test_broadcast_to_fail
-tests/third_party/cupy/manipulation_tests/test_dims.py::TestDims::test_broadcast_to_fail_numpy19
-tests/third_party/cupy/manipulation_tests/test_dims.py::TestDims::test_broadcast_to_numpy19
-tests/third_party/cupy/manipulation_tests/test_dims.py::TestDims::test_broadcast_to_short_shape
-tests/third_party/cupy/manipulation_tests/test_dims.py::TestDims::test_broadcast_to_short_shape_numpy19
+
 tests/third_party/cupy/manipulation_tests/test_dims.py::TestDims::test_squeeze_int_axis_failure1
 tests/third_party/cupy/manipulation_tests/test_dims.py::TestDims::test_squeeze_int_axis_failure2
 tests/third_party/cupy/manipulation_tests/test_dims.py::TestDims::test_squeeze_scalar_failure1
diff --git a/tests/test_arraymanipulation.py b/tests/test_arraymanipulation.py
index f22e8175c3b2..6a2b452917b0 100644
--- a/tests/test_arraymanipulation.py
+++ b/tests/test_arraymanipulation.py
@@ -2,7 +2,16 @@
 from .helper import get_all_dtypes
 
 import dpnp
+
 import numpy
+from numpy.testing import (
+    assert_,
+    assert_allclose,
+    assert_array_equal,
+    assert_equal,
+    assert_raises,
+    assert_warns
+)
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
@@ -14,7 +23,7 @@ def test_asfarray(dtype, data):
     expected = numpy.asfarray(data, dtype)
     result = dpnp.asfarray(data, dtype)
 
-    numpy.testing.assert_array_equal(result, expected)
+    assert_array_equal(result, expected)
 
 
 @pytest.mark.parametrize("dtype", get_all_dtypes())
@@ -24,7 +33,99 @@ def test_asfarray2(dtype, data, data_dtype):
     expected = numpy.asfarray(numpy.array(data, dtype=data_dtype), dtype)
     result = dpnp.asfarray(dpnp.array(data, dtype=data_dtype), dtype)
 
-    numpy.testing.assert_array_equal(result, expected)
+    assert_array_equal(result, expected)
+
+
+class TestDims:
+    @pytest.mark.parametrize("dt", get_all_dtypes())
+    @pytest.mark.parametrize("sh",
+                             [(0,), (1,), (3,)],
+                             ids=['(0,)', '(1,)', '(3,)'])
+    def test_broadcast_array(self, sh, dt):
+        np_a = numpy.array(0, dtype=dt)
+        dp_a = dpnp.array(0, dtype=dt)
+        func = lambda xp, a: xp.broadcast_to(a, sh)
+
+        assert_allclose(func(numpy, np_a), func(dpnp, dp_a))
+
+    @pytest.mark.parametrize("dt", get_all_dtypes())
+    @pytest.mark.parametrize("sh",
+                             [(1,), (2,), (1, 2, 3)],
+                             ids=['(1,)', '(2,)', '(1, 2, 3)'])
+    def test_broadcast_ones(self, sh, dt):
+        np_a = numpy.ones(1, dtype=dt)
+        dp_a = dpnp.ones(1, dtype=dt)
+        func = lambda xp, a: xp.broadcast_to(a, sh)
+
+        assert_allclose(func(numpy, np_a), func(dpnp, dp_a))
+
+    @pytest.mark.parametrize("dt", get_all_dtypes(no_bool=True))
+    @pytest.mark.parametrize("sh",
+                             [(3,), (1, 3), (2, 3)],
+                             ids=['(3,)', '(1, 3)', '(2, 3)'])
+    def test_broadcast_arange(self, sh, dt):
+        np_a = numpy.arange(3, dtype=dt)
+        dp_a = dpnp.arange(3, dtype=dt)
+        func = lambda xp, a: xp.broadcast_to(a, sh)
+
+        assert_allclose(func(numpy, np_a), func(dpnp, dp_a))
+
+    @pytest.mark.parametrize("dt", get_all_dtypes())
+    @pytest.mark.parametrize(
+        "sh1, sh2",
+        [
+            pytest.param([0], [0], id="(0)"),
+            pytest.param([1], [1], id="(1)"),
+            pytest.param([1], [2], id="(2)"),
+        ],
+    )
+    def test_broadcast_not_tuple(self, sh1, sh2, dt):
+        np_a = numpy.ones(sh1, dtype=dt)
+        dp_a = dpnp.ones(sh1, dtype=dt)
+        func = lambda xp, a: xp.broadcast_to(a, sh2)
+
+        assert_allclose(func(numpy, np_a), func(dpnp, dp_a))
+
+    @pytest.mark.parametrize("dt", get_all_dtypes())
+    @pytest.mark.parametrize(
+        "sh1, sh2",
+        [
+            pytest.param([1], (0,), id="(0,)"),
+            pytest.param((1, 2), (0, 2), id="(0, 2)"),
+            pytest.param((2, 1), (2, 0), id="(2, 0)"),
+        ],
+    )
+    def test_broadcast_zero_shape(self, sh1, sh2, dt):
+        np_a = numpy.ones(sh1, dtype=dt)
+        dp_a = dpnp.ones(sh1, dtype=dt)
+        func = lambda xp, a: xp.broadcast_to(a, sh2)
+
+        assert_allclose(func(numpy, np_a), func(dpnp, dp_a))
+
+    @pytest.mark.parametrize(
+        "sh1, sh2",
+        [
+            pytest.param((0,), (), id="(0,)-()"),
+            pytest.param((1,), (), id="(1,)-()"),
+            pytest.param((3,), (), id="(3,)-()"),
+            pytest.param((3,), (1,), id="(3,)-(1,)"),
+            pytest.param((3,), (2,), id="(3,)-(2,)"),
+            pytest.param((3,), (4,), id="(3,)-(4,)"),
+            pytest.param((1, 2), (2, 1), id="(1, 2)-(2, 1)"),
+            pytest.param((1, 2), (1,), id="(1, 2)-(1,)"),
+            pytest.param((1,), -1, id="(1,)--1"),
+            pytest.param((1,), (-1,), id="(1,)-(-1,)"),
+            pytest.param((1, 2), (-1, 2), id="(1, 2)-(-1, 2)"),
+        ],
+    )
+    def test_broadcast_raise(self, sh1, sh2):
+        np_a = numpy.zeros(sh1)
+        dp_a = dpnp.zeros(sh1)
+        func = lambda xp, a: xp.broadcast_to(a, sh2)
+
+        with pytest.raises(ValueError):
+            func(numpy, np_a)
+            func(dpnp, dp_a)
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
@@ -38,62 +139,62 @@ def test_returns_copy(self):
     def test_large_concatenate_axis_None(self):
         x = dpnp.arange(1, 100)
         r = dpnp.concatenate(x, None)
-        numpy.testing.assert_array_equal(x, r)
+        assert_array_equal(x, r)
         r = dpnp.concatenate(x, 100)
-        numpy.testing.assert_array_equal(x, r)
+        assert_array_equal(x, r)
 
     def test_concatenate(self):
         # Test concatenate function
         # One sequence returns unmodified (but as array)
         r4 = list(range(4))
-        numpy.testing.assert_array_equal(dpnp.concatenate((r4,)), r4)
+        assert_array_equal(dpnp.concatenate((r4,)), r4)
         # Any sequence
-        numpy.testing.assert_array_equal(dpnp.concatenate((tuple(r4),)), r4)
-        numpy.testing.assert_array_equal(dpnp.concatenate((dpnp.array(r4),)), r4)
+        assert_array_equal(dpnp.concatenate((tuple(r4),)), r4)
+        assert_array_equal(dpnp.concatenate((dpnp.array(r4),)), r4)
         # 1D default concatenation
         r3 = list(range(3))
-        numpy.testing.assert_array_equal(dpnp.concatenate((r4, r3)), r4 + r3)
+        assert_array_equal(dpnp.concatenate((r4, r3)), r4 + r3)
         # Mixed sequence types
-        numpy.testing.assert_array_equal(dpnp.concatenate((tuple(r4), r3)), r4 + r3)
-        numpy.testing.assert_array_equal(
+        assert_array_equal(dpnp.concatenate((tuple(r4), r3)), r4 + r3)
+        assert_array_equal(
             dpnp.concatenate((dpnp.array(r4), r3)), r4 + r3
         )
         # Explicit axis specification
-        numpy.testing.assert_array_equal(dpnp.concatenate((r4, r3), 0), r4 + r3)
+        assert_array_equal(dpnp.concatenate((r4, r3), 0), r4 + r3)
         # Including negative
-        numpy.testing.assert_array_equal(dpnp.concatenate((r4, r3), -1), r4 + r3)
+        assert_array_equal(dpnp.concatenate((r4, r3), -1), r4 + r3)
         # 2D
         a23 = dpnp.array([[10, 11, 12], [13, 14, 15]])
         a13 = dpnp.array([[0, 1, 2]])
         res = dpnp.array([[10, 11, 12], [13, 14, 15], [0, 1, 2]])
-        numpy.testing.assert_array_equal(dpnp.concatenate((a23, a13)), res)
-        numpy.testing.assert_array_equal(dpnp.concatenate((a23, a13), 0), res)
-        numpy.testing.assert_array_equal(dpnp.concatenate((a23.T, a13.T), 1), res.T)
-        numpy.testing.assert_array_equal(dpnp.concatenate((a23.T, a13.T), -1), res.T)
+        assert_array_equal(dpnp.concatenate((a23, a13)), res)
+        assert_array_equal(dpnp.concatenate((a23, a13), 0), res)
+        assert_array_equal(dpnp.concatenate((a23.T, a13.T), 1), res.T)
+        assert_array_equal(dpnp.concatenate((a23.T, a13.T), -1), res.T)
         # Arrays much match shape
-        numpy.testing.assert_raises(ValueError, dpnp.concatenate, (a23.T, a13.T), 0)
+        assert_raises(ValueError, dpnp.concatenate, (a23.T, a13.T), 0)
         # 3D
         res = dpnp.reshape(dpnp.arange(2 * 3 * 7), (2, 3, 7))
         a0 = res[..., :4]
         a1 = res[..., 4:6]
         a2 = res[..., 6:]
-        numpy.testing.assert_array_equal(dpnp.concatenate((a0, a1, a2), 2), res)
-        numpy.testing.assert_array_equal(dpnp.concatenate((a0, a1, a2), -1), res)
-        numpy.testing.assert_array_equal(dpnp.concatenate((a0.T, a1.T, a2.T), 0), res.T)
+        assert_array_equal(dpnp.concatenate((a0, a1, a2), 2), res)
+        assert_array_equal(dpnp.concatenate((a0, a1, a2), -1), res)
+        assert_array_equal(dpnp.concatenate((a0.T, a1.T, a2.T), 0), res.T)
 
         out = dpnp.copy(res)
         rout = dpnp.concatenate((a0, a1, a2), 2, out=out)
-        numpy.testing.assert_(out is rout)
-        numpy.testing.assert_equal(res, rout)
+        assert_(out is rout)
+        assert_equal(res, rout)
 
 
 class TestHstack:
     def test_non_iterable(self):
-        numpy.testing.assert_raises(TypeError, dpnp.hstack, 1)
+        assert_raises(TypeError, dpnp.hstack, 1)
 
     @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_empty_input(self):
-        numpy.testing.assert_raises(ValueError, dpnp.hstack, ())
+        assert_raises(ValueError, dpnp.hstack, ())
 
     @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_0D_array(self):
@@ -101,7 +202,7 @@ def test_0D_array(self):
         a = dpnp.array(1)
         res = dpnp.hstack([a, b])
         desired = dpnp.array([1, 2])
-        numpy.testing.assert_array_equal(res, desired)
+        assert_array_equal(res, desired)
 
     @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_1D_array(self):
@@ -109,7 +210,7 @@ def test_1D_array(self):
         b = dpnp.array([2])
         res = dpnp.hstack([a, b])
         desired = dpnp.array([1, 2])
-        numpy.testing.assert_array_equal(res, desired)
+        assert_array_equal(res, desired)
 
     @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_2D_array(self):
@@ -117,22 +218,22 @@ def test_2D_array(self):
         b = dpnp.array([[1], [2]])
         res = dpnp.hstack([a, b])
         desired = dpnp.array([[1, 1], [2, 2]])
-        numpy.testing.assert_array_equal(res, desired)
+        assert_array_equal(res, desired)
 
     def test_generator(self):
-        with numpy.testing.assert_warns(FutureWarning):
+        with assert_warns(FutureWarning):
             dpnp.hstack((numpy.arange(3) for _ in range(2)))
-        with numpy.testing.assert_warns(FutureWarning):
+        with assert_warns(FutureWarning):
             dpnp.hstack(map(lambda x: x, numpy.ones((3, 2))))
 
 
 class TestVstack:
     def test_non_iterable(self):
-        numpy.testing.assert_raises(TypeError, dpnp.vstack, 1)
+        assert_raises(TypeError, dpnp.vstack, 1)
 
     @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_empty_input(self):
-        numpy.testing.assert_raises(ValueError, dpnp.vstack, ())
+        assert_raises(ValueError, dpnp.vstack, ())
 
     @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_0D_array(self):
@@ -140,7 +241,7 @@ def test_0D_array(self):
         b = dpnp.array(2)
         res = dpnp.vstack([a, b])
         desired = dpnp.array([[1], [2]])
-        numpy.testing.assert_array_equal(res, desired)
+        assert_array_equal(res, desired)
 
     @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_1D_array(self):
@@ -148,7 +249,7 @@ def test_1D_array(self):
         b = dpnp.array([2])
         res = dpnp.vstack([a, b])
         desired = dpnp.array([[1], [2]])
-        numpy.testing.assert_array_equal(res, desired)
+        assert_array_equal(res, desired)
 
     @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_2D_array(self):
@@ -156,7 +257,7 @@ def test_2D_array(self):
         b = dpnp.array([[1], [2]])
         res = dpnp.vstack([a, b])
         desired = dpnp.array([[1], [2], [1], [2]])
-        numpy.testing.assert_array_equal(res, desired)
+        assert_array_equal(res, desired)
 
     @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_2D_array2(self):
@@ -164,8 +265,8 @@ def test_2D_array2(self):
         b = dpnp.array([1, 2])
         res = dpnp.vstack([a, b])
         desired = dpnp.array([[1, 2], [1, 2]])
-        numpy.testing.assert_array_equal(res, desired)
+        assert_array_equal(res, desired)
 
     def test_generator(self):
-        with numpy.testing.assert_warns(FutureWarning):
+        with assert_warns(FutureWarning):
             dpnp.vstack((numpy.arange(3) for _ in range(2)))
diff --git a/tests/test_sycl_queue.py b/tests/test_sycl_queue.py
index 42cbe7459513..a523c46465bf 100644
--- a/tests/test_sycl_queue.py
+++ b/tests/test_sycl_queue.py
@@ -927,8 +927,7 @@ def test_from_dlpack(arr_dtype, shape, device):
 @pytest.mark.parametrize("device",
                          valid_devices,
                          ids=[device.filter_string for device in valid_devices])
-#TODO need to delete no_bool=True when use dlpack > 0.7 version
-@pytest.mark.parametrize("arr_dtype", get_all_dtypes(no_float16=True, no_bool=True))
+@pytest.mark.parametrize("arr_dtype", get_all_dtypes(no_float16=True))
 def test_from_dlpack_with_dpt(arr_dtype, device):
     X = dpctl.tensor.empty((64,), dtype=arr_dtype, device=device)
     Y = dpnp.from_dlpack(X)
@@ -937,3 +936,12 @@ def test_from_dlpack_with_dpt(arr_dtype, device):
     assert X.__dlpack_device__() == Y.__dlpack_device__()
     assert X.usm_type == Y.usm_type
     assert_sycl_queue_equal(X.sycl_queue, Y.sycl_queue)
+
+
+@pytest.mark.parametrize("device",
+                         valid_devices,
+                         ids=[device.filter_string for device in valid_devices])
+def test_broadcast_to(device):
+    x = dpnp.arange(5, device=device)
+    y = dpnp.broadcast_to(x, (3, 5))
+    assert_sycl_queue_equal(x.sycl_queue, y.sycl_queue)
diff --git a/tests/test_usm_type.py b/tests/test_usm_type.py
index 96d55f6875c5..b0efa89968bd 100644
--- a/tests/test_usm_type.py
+++ b/tests/test_usm_type.py
@@ -155,6 +155,7 @@ def test_meshgrid(usm_type_x, usm_type_y):
     assert z[0].usm_type == usm_type_x
     assert z[1].usm_type == usm_type_y
 
+
 @pytest.mark.parametrize(
     "func,data1,data2",
     [
@@ -173,3 +174,10 @@ def test_2in_1out(func, data1, data2, usm_type_x, usm_type_y):
     assert x.usm_type == usm_type_x
     assert y.usm_type == usm_type_y
     assert z.usm_type == du.get_coerced_usm_type([usm_type_x, usm_type_y])
+
+
+@pytest.mark.parametrize("usm_type", list_of_usm_types, ids=list_of_usm_types)
+def test_broadcast_to(usm_type):
+    x = dp.ones(7, usm_type=usm_type)
+    y = dp.broadcast_to(x, (2, 7))
+    assert x.usm_type == y.usm_type

From 29a206329d5b731710d0bc6622af861d508862cf Mon Sep 17 00:00:00 2001
From: Anton <100830759+antonwolfy@users.noreply.github.com>
Date: Tue, 7 Mar 2023 13:13:55 +0100
Subject: [PATCH 002/129] Add support of bool type in bitwise operations
 (#1334)

* Add support of bool type in bitwise operations

* Update dpnp/dpnp_algo/dpnp_algo_bitwise.pyx
---
 .../include/dpnp_gen_2arg_1type_tbl.hpp       |   4 +-
 dpnp/backend/kernels/dpnp_krnl_bitwise.cpp    | 110 ++++++++--
 dpnp/dpnp_algo/dpnp_algo_bitwise.pyx          |   6 +-
 dpnp/dpnp_array.py                            |  69 ++++--
 dpnp/dpnp_iface.py                            |   3 +-
 dpnp/dpnp_iface_bitwise.py                    | 196 +++++++++++-------
 dpnp/dpnp_iface_logic.py                      |  30 ++-
 dpnp/dpnp_iface_mathematical.py               |  16 +-
 tests/helper.py                               |   8 +-
 tests/skipped_tests.tbl                       |   7 +-
 tests/skipped_tests_gpu.tbl                   |   7 +-
 tests/test_bitwise.py                         |  87 +++++---
 tests/test_usm_type.py                        |  18 ++
 .../cupy/binary_tests/test_elementwise.py     |   5 +-
 14 files changed, 401 insertions(+), 165 deletions(-)

diff --git a/dpnp/backend/include/dpnp_gen_2arg_1type_tbl.hpp b/dpnp/backend/include/dpnp_gen_2arg_1type_tbl.hpp
index 19589c4b0eeb..0330faeee370 100644
--- a/dpnp/backend/include/dpnp_gen_2arg_1type_tbl.hpp
+++ b/dpnp/backend/include/dpnp_gen_2arg_1type_tbl.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2020, Intel Corporation
+// Copyright (c) 2016-2023, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -104,7 +104,7 @@
 
 #endif
 
-MACRO_2ARG_1TYPE_OP(dpnp_bitwise_and_c, input1_elem& input2_elem)
+MACRO_2ARG_1TYPE_OP(dpnp_bitwise_and_c, input1_elem & input2_elem)
 MACRO_2ARG_1TYPE_OP(dpnp_bitwise_or_c, input1_elem | input2_elem)
 MACRO_2ARG_1TYPE_OP(dpnp_bitwise_xor_c, input1_elem ^ input2_elem)
 MACRO_2ARG_1TYPE_OP(dpnp_left_shift_c, input1_elem << input2_elem)
diff --git a/dpnp/backend/kernels/dpnp_krnl_bitwise.cpp b/dpnp/backend/kernels/dpnp_krnl_bitwise.cpp
index c082bd636bf9..f3d8a4a95ccd 100644
--- a/dpnp/backend/kernels/dpnp_krnl_bitwise.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_bitwise.cpp
@@ -27,6 +27,7 @@
 
 #include "dpnp_fptr.hpp"
 #include "dpnp_iface.hpp"
+#include "dpnp_iterator.hpp"
 #include "dpnp_utils.hpp"
 #include "dpnpc_memory_adapter.hpp"
 #include "queue_sycl.hpp"
@@ -49,27 +50,66 @@ DPCTLSyclEventRef dpnp_invert_c(DPCTLSyclQueueRef q_ref,
     sycl::queue q = *(reinterpret_cast<sycl::queue*>(q_ref));
     sycl::event event;
 
-    DPNPC_ptr_adapter<_DataType> input1_ptr(q_ref, array1_in, size);
-    _DataType* array1 = input1_ptr.get_ptr();
-    _DataType* result = reinterpret_cast<_DataType*>(result1);
+    _DataType* input_data = static_cast<_DataType*>(array1_in);
+    _DataType* result = static_cast<_DataType*>(result1);
 
-    sycl::range<1> gws(size);
-    auto kernel_parallel_for_func = [=](sycl::id<1> global_id) {
-        size_t i = global_id[0]; /*for (size_t i = 0; i < size; ++i)*/
+    constexpr size_t lws = 64;
+    constexpr unsigned int vec_sz = 8;
+
+    auto gws_range = sycl::range<1>(((size + lws * vec_sz - 1) / (lws * vec_sz)) * lws);
+    auto lws_range = sycl::range<1>(lws);
+
+    auto kernel_parallel_for_func = [=](sycl::nd_item<1> nd_it) {
+        auto sg = nd_it.get_sub_group();
+        const auto max_sg_size = sg.get_max_local_range()[0];
+        const size_t start =
+            vec_sz * (nd_it.get_group(0) * nd_it.get_local_range(0) + sg.get_group_id()[0] * max_sg_size);
+
+        if (start + static_cast<size_t>(vec_sz) * max_sg_size < size)
         {
-            _DataType input_elem1 = array1[i];
-            result[i] = ~input_elem1;
+            using multi_ptrT = sycl::multi_ptr<_DataType, sycl::access::address_space::global_space>;
+
+            sycl::vec<_DataType, vec_sz> x = sg.load<vec_sz>(multi_ptrT(&input_data[start]));
+            sycl::vec<_DataType, vec_sz> res_vec;
+
+            if constexpr (std::is_same_v<_DataType, bool>)
+            {
+#pragma unroll
+                for (size_t k = 0; k < vec_sz; ++k)
+                {
+                    res_vec[k] = !(x[k]);
+                }
+            }
+            else
+            {
+                res_vec = ~x;
+            }
+
+            sg.store<vec_sz>(multi_ptrT(&result[start]), res_vec);
+        }
+        else
+        {
+            for (size_t k = start + sg.get_local_id()[0]; k < size; k += max_sg_size)
+            {
+                if constexpr (std::is_same_v<_DataType, bool>)
+                {
+                    result[k] = !(input_data[k]);
+                }
+                else
+                {
+                    result[k] = ~(input_data[k]);
+                }
+            }
         }
     };
 
     auto kernel_func = [&](sycl::handler& cgh) {
-        cgh.parallel_for<class dpnp_invert_c_kernel<_DataType>>(gws, kernel_parallel_for_func);
+        cgh.parallel_for<class dpnp_invert_c_kernel<_DataType>>(sycl::nd_range<1>(gws_range, lws_range),
+                                                                kernel_parallel_for_func);
     };
-
     event = q.submit(kernel_func);
 
     event_ref = reinterpret_cast<DPCTLSyclEventRef>(&event);
-
     return DPCTLEvent_Copy(event_ref);
 }
 
@@ -84,6 +124,7 @@ void dpnp_invert_c(void* array1_in, void* result1, size_t size)
                                                            size,
                                                            dep_event_vec_ref);
     DPCTLEvent_WaitAndThrow(event_ref);
+    DPCTLEvent_Delete(event_ref);
 }
 
 template <typename _DataType>
@@ -98,9 +139,11 @@ DPCTLSyclEventRef (*dpnp_invert_ext_c)(DPCTLSyclQueueRef,
 
 static void func_map_init_bitwise_1arg_1type(func_map_t& fmap)
 {
+    fmap[DPNPFuncName::DPNP_FN_INVERT][eft_BLN][eft_BLN] = {eft_BLN, (void*)dpnp_invert_default_c<bool>};
     fmap[DPNPFuncName::DPNP_FN_INVERT][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_invert_default_c<int32_t>};
     fmap[DPNPFuncName::DPNP_FN_INVERT][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_invert_default_c<int64_t>};
 
+    fmap[DPNPFuncName::DPNP_FN_INVERT_EXT][eft_BLN][eft_BLN] = {eft_BLN, (void*)dpnp_invert_ext_c<bool>};
     fmap[DPNPFuncName::DPNP_FN_INVERT_EXT][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_invert_ext_c<int32_t>};
     fmap[DPNPFuncName::DPNP_FN_INVERT_EXT][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_invert_ext_c<int64_t>};
 
@@ -114,6 +157,9 @@ static void func_map_init_bitwise_1arg_1type(func_map_t& fmap)
     template <typename _KernelNameSpecialization>                                                                      \
     class __name__##_strides_kernel;                                                                                   \
                                                                                                                        \
+    template <typename _KernelNameSpecialization>                                                                      \
+    class __name__##_broadcast_kernel;                                                                                 \
+                                                                                                                       \
     template <typename _DataType>                                                                                      \
     DPCTLSyclEventRef __name__(DPCTLSyclQueueRef q_ref,                                                                \
                                void* result_out,                                                                       \
@@ -152,6 +198,8 @@ static void func_map_init_bitwise_1arg_1type(func_map_t& fmap)
         _DataType* input2_data = static_cast<_DataType*>(const_cast<void*>(input2_in));                                \
         _DataType* result = static_cast<_DataType*>(result_out);                                                       \
                                                                                                                        \
+        bool use_broadcasting = !array_equal(input1_shape, input1_ndim, input2_shape, input2_ndim);                    \
+                                                                                                                       \
         shape_elem_type* input1_shape_offsets = new shape_elem_type[input1_ndim];                                      \
                                                                                                                        \
         get_shape_offsets_inkernel(input1_shape, input1_ndim, input1_shape_offsets);                                   \
@@ -167,7 +215,42 @@ static void func_map_init_bitwise_1arg_1type(func_map_t& fmap)
         sycl::event event;                                                                                             \
         sycl::range<1> gws(result_size);                                                                               \
                                                                                                                        \
-        if (use_strides)                                                                                               \
+        if (use_broadcasting)                                                                                          \
+        {                                                                                                              \
+            DPNPC_id<_DataType>* input1_it;                                                                            \
+            const size_t input1_it_size_in_bytes = sizeof(DPNPC_id<_DataType>);                                        \
+            input1_it = reinterpret_cast<DPNPC_id<_DataType>*>(dpnp_memory_alloc_c(q_ref, input1_it_size_in_bytes));   \
+            new (input1_it) DPNPC_id<_DataType>(q_ref, input1_data, input1_shape, input1_strides, input1_ndim);        \
+                                                                                                                       \
+            input1_it->broadcast_to_shape(result_shape, result_ndim);                                                  \
+                                                                                                                       \
+            DPNPC_id<_DataType>* input2_it;                                                                            \
+            const size_t input2_it_size_in_bytes = sizeof(DPNPC_id<_DataType>);                                        \
+            input2_it = reinterpret_cast<DPNPC_id<_DataType>*>(dpnp_memory_alloc_c(q_ref, input2_it_size_in_bytes));   \
+            new (input2_it) DPNPC_id<_DataType>(q_ref, input2_data, input2_shape, input2_strides, input2_ndim);        \
+                                                                                                                       \
+            input2_it->broadcast_to_shape(result_shape, result_ndim);                                                  \
+                                                                                                                       \
+            auto kernel_parallel_for_func = [=](sycl::id<1> global_id) {                                               \
+                const size_t i = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */                          \
+                {                                                                                                      \
+                    const _DataType input1_elem = (*input1_it)[i];                                                     \
+                    const _DataType input2_elem = (*input2_it)[i];                                                     \
+                    result[i] = __operation__;                                                                         \
+                }                                                                                                      \
+            };                                                                                                         \
+            auto kernel_func = [&](sycl::handler& cgh) {                                                               \
+                cgh.parallel_for<class __name__##_broadcast_kernel<_DataType>>(gws, kernel_parallel_for_func);         \
+            };                                                                                                         \
+                                                                                                                       \
+            q.submit(kernel_func).wait();                                                                              \
+                                                                                                                       \
+            input1_it->~DPNPC_id();                                                                                    \
+            input2_it->~DPNPC_id();                                                                                    \
+                                                                                                                       \
+            return event_ref;                                                                                          \
+        }                                                                                                              \
+        else if (use_strides)                                                                                          \
         {                                                                                                              \
             if ((result_ndim != input1_ndim) || (result_ndim != input2_ndim))                                          \
             {                                                                                                          \
@@ -332,18 +415,21 @@ static void func_map_init_bitwise_2arg_1type(func_map_t& fmap)
     fmap[DPNPFuncName::DPNP_FN_BITWISE_AND][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_bitwise_and_c_default<int32_t>};
     fmap[DPNPFuncName::DPNP_FN_BITWISE_AND][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_bitwise_and_c_default<int64_t>};
 
+    fmap[DPNPFuncName::DPNP_FN_BITWISE_AND_EXT][eft_BLN][eft_BLN] = {eft_BLN, (void*)dpnp_bitwise_and_c_ext<bool>};
     fmap[DPNPFuncName::DPNP_FN_BITWISE_AND_EXT][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_bitwise_and_c_ext<int32_t>};
     fmap[DPNPFuncName::DPNP_FN_BITWISE_AND_EXT][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_bitwise_and_c_ext<int64_t>};
 
     fmap[DPNPFuncName::DPNP_FN_BITWISE_OR][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_bitwise_or_c_default<int32_t>};
     fmap[DPNPFuncName::DPNP_FN_BITWISE_OR][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_bitwise_or_c_default<int64_t>};
 
+    fmap[DPNPFuncName::DPNP_FN_BITWISE_OR_EXT][eft_BLN][eft_BLN] = {eft_BLN, (void*)dpnp_bitwise_or_c_ext<bool>};
     fmap[DPNPFuncName::DPNP_FN_BITWISE_OR_EXT][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_bitwise_or_c_ext<int32_t>};
     fmap[DPNPFuncName::DPNP_FN_BITWISE_OR_EXT][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_bitwise_or_c_ext<int64_t>};
 
     fmap[DPNPFuncName::DPNP_FN_BITWISE_XOR][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_bitwise_xor_c_default<int32_t>};
     fmap[DPNPFuncName::DPNP_FN_BITWISE_XOR][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_bitwise_xor_c_default<int64_t>};
 
+    fmap[DPNPFuncName::DPNP_FN_BITWISE_XOR_EXT][eft_BLN][eft_BLN] = {eft_BLN, (void*)dpnp_bitwise_xor_c_ext<bool>};
     fmap[DPNPFuncName::DPNP_FN_BITWISE_XOR_EXT][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_bitwise_xor_c_ext<int32_t>};
     fmap[DPNPFuncName::DPNP_FN_BITWISE_XOR_EXT][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_bitwise_xor_c_ext<int64_t>};
 
diff --git a/dpnp/dpnp_algo/dpnp_algo_bitwise.pyx b/dpnp/dpnp_algo/dpnp_algo_bitwise.pyx
index 482f00c2c71d..a8af53b709d1 100644
--- a/dpnp/dpnp_algo/dpnp_algo_bitwise.pyx
+++ b/dpnp/dpnp_algo/dpnp_algo_bitwise.pyx
@@ -1,7 +1,7 @@
 # cython: language_level=3
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2020, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -68,8 +68,8 @@ cpdef utils.dpnp_descriptor dpnp_bitwise_xor(utils.dpnp_descriptor x1_obj,
     return call_fptr_2in_1out_strides(DPNP_FN_BITWISE_XOR_EXT, x1_obj, x2_obj, dtype=dtype, out=out, where=where)
 
 
-cpdef utils.dpnp_descriptor dpnp_invert(utils.dpnp_descriptor arr):
-    return call_fptr_1in_1out(DPNP_FN_INVERT_EXT, arr, arr.shape)
+cpdef utils.dpnp_descriptor dpnp_invert(utils.dpnp_descriptor arr, utils.dpnp_descriptor out=None):
+    return call_fptr_1in_1out(DPNP_FN_INVERT_EXT, arr, arr.shape, out=out, func_name="invert")
 
 
 cpdef utils.dpnp_descriptor dpnp_left_shift(utils.dpnp_descriptor x1_obj,
diff --git a/dpnp/dpnp_array.py b/dpnp/dpnp_array.py
index d1ad1252d4ec..f2ccf56ef76b 100644
--- a/dpnp/dpnp_array.py
+++ b/dpnp/dpnp_array.py
@@ -125,7 +125,9 @@ def __abs__(self):
     def __add__(self, other):
         return dpnp.add(self, other)
 
- # '__and__',
+    def __and__(self, other):
+        return dpnp.bitwise_and(self, other)
+
  # '__array__',
  # '__array_finalize__',
  # '__array_function__',
@@ -193,9 +195,17 @@ def __gt__(self, other):
 
  # '__hash__',
  # '__iadd__',
- # '__iand__',
+
+    def __iand__(self, other):
+        dpnp.bitwise_and(self, other, out=self)
+        return self
+
  # '__ifloordiv__',
- # '__ilshift__',
+
+    def __ilshift__(self, other):
+        dpnp.left_shift(self, other, out=self)
+        return self
+
  # '__imatmul__',
  # '__imod__',
  # '__imul__',
@@ -209,18 +219,28 @@ def __index__(self):
     def __int__(self):
         return self._array_obj.__int__()
 
- # '__invert__',
- # '__ior__',
+    def __invert__(self):
+        return dpnp.invert(self)
+
+    def __ior__(self, other):
+        dpnp.bitwise_or(self, other, out=self)
+        return self
 
     def __ipow__(self, other):
         dpnp.power(self, other, out=self)
         return self
 
- # '__irshift__',
+    def __irshift__(self, other):
+        dpnp.right_shift(self, other, out=self)
+        return self
+
  # '__isub__',
  # '__iter__',
  # '__itruediv__',
- # '__ixor__',
+
+    def __ixor__(self, other):
+        dpnp.bitwise_xor(self, other, out=self)
+        return self
 
     def __le__(self, other):
         return dpnp.less_equal(self, other)
@@ -232,7 +252,8 @@ def __len__(self):
 
         return self._array_obj.__len__()
 
- # '__lshift__',
+    def __lshift__(self, other):
+        return dpnp.left_shift(self, other)
 
     def __lt__(self, other):
         return dpnp.less(self, other)
@@ -253,7 +274,10 @@ def __neg__(self):
         return dpnp.negative(self)
 
  # '__new__',
- # '__or__',
+
+    def __or__(self, other):
+        return dpnp.bitwise_or(self, other)
+
  # '__pos__',
 
     def __pow__(self, other):
@@ -262,7 +286,9 @@ def __pow__(self, other):
     def __radd__(self, other):
         return dpnp.add(other, self)
 
- # '__rand__',
+    def __rand__(self, other):
+        return dpnp.bitwise_and(other, self)
+
  # '__rdivmod__',
  # '__reduce__',
  # '__reduce_ex__',
@@ -271,7 +297,9 @@ def __repr__(self):
         return dpt.usm_ndarray_repr(self._array_obj, prefix="array")
 
  # '__rfloordiv__',
- # '__rlshift__',
+
+    def __rlshift__(self, other):
+        return dpnp.left_shift(other, self)
 
     def __rmatmul__(self, other):
         return dpnp.matmul(other, self)
@@ -282,13 +310,17 @@ def __rmod__(self, other):
     def __rmul__(self, other):
         return dpnp.multiply(other, self)
 
- # '__ror__',
- 
+    def __ror__(self, other):
+        return dpnp.bitwise_or(other, self)
+
     def __rpow__(self, other):
         return dpnp.power(other, self)
 
- # '__rrshift__',
- # '__rshift__',
+    def __rrshift__(self, other):
+        return dpnp.right_shift(other, self)
+
+    def __rshift__(self, other):
+        return dpnp.right_shift(self, other)
 
     def __rsub__(self, other):
         return dpnp.subtract(other, self)
@@ -296,7 +328,9 @@ def __rsub__(self, other):
     def __rtruediv__(self, other):
         return dpnp.true_divide(other, self)
 
- # '__rxor__',
+    def __rxor__(self, other):
+        return dpnp.bitwise_xor(other, self)
+
  # '__setattr__',
 
     def __setitem__(self, key, val):
@@ -334,7 +368,8 @@ def __sub__(self, other):
     def __truediv__(self, other):
         return dpnp.true_divide(self, other)
 
- # '__xor__',
+    def __xor__(self, other):
+        return dpnp.bitwise_xor(self, other)
 
     @staticmethod
     def _create_from_usm_ndarray(usm_ary : dpt.usm_ndarray):
diff --git a/dpnp/dpnp_iface.py b/dpnp/dpnp_iface.py
index b7cdef8cc615..9bf456060ddd 100644
--- a/dpnp/dpnp_iface.py
+++ b/dpnp/dpnp_iface.py
@@ -251,6 +251,7 @@ def from_dlpack(obj, /):
 def get_dpnp_descriptor(ext_obj,
                         copy_when_strides=True,
                         copy_when_nondefault_queue=True,
+                        alloc_dtype=None,
                         alloc_usm_type=None,
                         alloc_queue=None):
     """
@@ -274,7 +275,7 @@ def get_dpnp_descriptor(ext_obj,
     # If input object is a scalar, it means it was allocated on host memory.
     # We need to copy it to USM memory according to compute follows data paradigm.
     if isscalar(ext_obj):
-        ext_obj = array(ext_obj, usm_type=alloc_usm_type, sycl_queue=alloc_queue)
+        ext_obj = array(ext_obj, dtype=alloc_dtype, usm_type=alloc_usm_type, sycl_queue=alloc_queue)
 
     # while dpnp functions have no implementation with strides support
     # we need to create a non-strided copy
diff --git a/dpnp/dpnp_iface_bitwise.py b/dpnp/dpnp_iface_bitwise.py
index 51a28b0464ea..36f37f4282ec 100644
--- a/dpnp/dpnp_iface_bitwise.py
+++ b/dpnp/dpnp_iface_bitwise.py
@@ -2,7 +2,7 @@
 # distutils: language = c++
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2020, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -40,19 +40,20 @@
 """
 
 
-import numpy
-
-
 from dpnp.dpnp_algo import *
 from dpnp.dpnp_utils import *
 import dpnp
 
+import numpy
+import dpctl.tensor as dpt
+
+
 __all__ = [
     'bitwise_and',
+    'bitwise_not',
     'bitwise_or',
     'bitwise_xor',
     'invert',
-    'bitwise_not',
     'left_shift',
     'right_shift',
 ]
@@ -61,37 +62,34 @@
 def _check_nd_call(origin_func, dpnp_func, x1, x2, dtype=None, out=None, where=True, **kwargs):
     """Choose function to call based on input and call chosen fucntion."""
 
-    x1_is_scalar = dpnp.isscalar(x1)
-    x2_is_scalar = dpnp.isscalar(x2)
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
-    x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_nondefault_queue=False)
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False)
-    x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False)
-
-    if x1_desc and x2_desc and not kwargs:
-        if not x1_desc and not x1_is_scalar:
-            pass
-        elif not x2_desc and not x2_is_scalar:
-            pass
-        elif x1_is_scalar and x2_is_scalar:
-            pass
-        elif x1_desc and x1_desc.ndim == 0:
-            pass
-        elif x2_desc and x2_desc.ndim == 0:
-            pass
-        elif x1_desc and x2_desc and x1_desc.size != x2_desc.size:
-            pass
-        elif x1_desc and x2_desc and x1_desc.shape != x2_desc.shape:
-            pass
-        elif dtype is not None:
-            pass
-        elif out is not None:
-            pass
-        elif not where:
-            pass
+    if where is not True:
+        pass
+    elif dtype is not None:
+        pass
+    elif dpnp.isscalar(x1) and dpnp.isscalar(x2):
+        # at least either x1 or x2 has to be an array
+        pass
+    else:
+        # get USM type and queue to copy scalar from the host memory into a USM allocation
+        if dpnp.isscalar(x1) or dpnp.isscalar(x2):
+            usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None)
+            dtype = x1.dtype if not dpnp.isscalar(x1) else x2.dtype
         else:
-            out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) if out is not None else None
-            return dpnp_func(x1_desc, x2_desc, dtype, out_desc, where).get_pyobj()
+            dtype, usm_type, queue = (None, None, None)
+
+        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_dtype=dtype, alloc_usm_type=usm_type, alloc_queue=queue)
+        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_dtype=dtype, alloc_usm_type=usm_type, alloc_queue=queue)
+        if x1_desc and x2_desc:
+            if out is not None:
+                if not isinstance(out, (dpnp.ndarray, dpt.usm_ndarray)):
+                    raise TypeError("return array must be of supported array type")
+                out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False)
+            else:
+                out_desc = None
+
+            return dpnp_func(x1_desc, x2_desc, dtype=dtype, out=out_desc, where=where).get_pyobj()
 
     return call_origin(origin_func, x1, x2, dtype=dtype, out=out, where=where, **kwargs)
 
@@ -102,14 +100,20 @@ def bitwise_and(x1, x2, dtype=None, out=None, where=True, **kwargs):
 
     For full documentation refer to :obj:`numpy.bitwise_and`.
 
+    Returns
+    -------
+    y : dpnp.ndarray
+        An array containing the element-wise results.
+    
     Limitations
     -----------
-    Parameters ``x1`` and ``x2`` are supported as either :obj:`dpnp.ndarray` or scalar.
-    Parameters ``dtype``, ``out`` and ``where`` are supported with their default values.
-    Sizes, shapes and data types of input arrays are supported to be equal.
-    Keyword arguments ``kwargs`` are currently unsupported.
+    Parameters `x1` and `x2` are supported as either scalar, :class:`dpnp.ndarray`
+    or :class:`dpctl.tensor.usm_ndarray`, but both `x1` and `x2` can not be scalars at the same time.
+    Parameters `dtype` and `where` are supported with their default values.
+    Keyword arguments `kwargs` are currently unsupported.
     Otherwise the function will be executed sequentially on CPU.
-    Input data is supported as integer only.
+    Data type of input arrays `x` and `y` are limited by :obj:`dpnp.bool`, :obj:`dpnp.int32`
+    and :obj:`dpnp.int64`.
 
     See Also
     --------
@@ -136,14 +140,20 @@ def bitwise_or(x1, x2, dtype=None, out=None, where=True, **kwargs):
 
     For full documentation refer to :obj:`numpy.bitwise_or`.
 
+    Returns
+    -------
+    y : dpnp.ndarray
+        An array containing the element-wise results.
+    
     Limitations
     -----------
-    Parameters ``x1`` and ``x2`` are supported as either :obj:`dpnp.ndarray` or scalar.
-    Parameters ``dtype``, ``out`` and ``where`` are supported with their default values.
-    Sizes, shapes and data types of input arrays are supported to be equal.
-    Keyword arguments ``kwargs`` are currently unsupported.
+    Parameters `x1` and `x2` are supported as either scalar, :class:`dpnp.ndarray`
+    or :class:`dpctl.tensor.usm_ndarray`, but both `x1` and `x2` can not be scalars at the same time.
+    Parameters `dtype` and `where` are supported with their default values.
+    Keyword arguments `kwargs` are currently unsupported.
     Otherwise the function will be executed sequentially on CPU.
-    Input data is supported as integer only.
+    Data type of input arrays `x` and `y` are limited by :obj:`dpnp.bool`, :obj:`dpnp.int32`
+    and :obj:`dpnp.int64`.
 
     See Also
     --------
@@ -170,14 +180,20 @@ def bitwise_xor(x1, x2, dtype=None, out=None, where=True, **kwargs):
 
     For full documentation refer to :obj:`numpy.bitwise_xor`.
 
+    Returns
+    -------
+    y : dpnp.ndarray
+        An array containing the element-wise results.
+    
     Limitations
     -----------
-    Parameters ``x1`` and ``x2`` are supported as either :obj:`dpnp.ndarray` or scalar.
-    Parameters ``dtype``, ``out`` and ``where`` are supported with their default values.
-    Sizes, shapes and data types of input arrays are supported to be equal.
-    Keyword arguments ``kwargs`` are currently unsupported.
+    Parameters `x1` and `x2` are supported as either scalar, :class:`dpnp.ndarray`
+    or :class:`dpctl.tensor.usm_ndarray`, but both `x1` and `x2` can not be scalars at the same time.
+    Parameters `dtype` and `where` are supported with their default values.
+    Keyword arguments `kwargs` are currently unsupported.
     Otherwise the function will be executed sequentially on CPU.
-    Input data is supported as integer only.
+    Data type of input arrays `x` and `y` are limited by :obj:`dpnp.bool`, :obj:`dpnp.int32`
+    and :obj:`dpnp.int64`.
 
     See Also
     --------
@@ -198,18 +214,33 @@ def bitwise_xor(x1, x2, dtype=None, out=None, where=True, **kwargs):
     return _check_nd_call(numpy.bitwise_xor, dpnp_bitwise_xor, x1, x2, dtype=dtype, out=out, where=where, **kwargs)
 
 
-def invert(x, **kwargs):
+def invert(x,
+           /,
+           out=None,
+           *,
+           where=True,
+           dtype=None,
+           subok=True,
+           **kwargs):
     """
     Compute bit-wise inversion, or bit-wise NOT, element-wise.
 
     For full documentation refer to :obj:`numpy.invert`.
 
+    Returns
+    -------
+    y : dpnp.ndarray
+        An array containing the element-wise results.
+    
     Limitations
     -----------
-    Parameters ``x`` is supported as :obj:`dpnp.ndarray`.
-    Keyword arguments ``kwargs`` are currently unsupported.
+    Parameter `x` is supported as either :class:`dpnp.ndarray`
+    or :class:`dpctl.tensor.usm_ndarray`.
+    Parameters `where`, `dtype` and `subok` are supported with their default values.
+    Keyword arguments `kwargs` are currently unsupported.
     Otherwise the function will be executed sequentially on CPU.
-    Input array ``x`` is supported as integer :obj:`dpnp.ndarray` only.
+    Data type of input array `x` is limited by :obj:`dpnp.bool`, :obj:`dpnp.int32`
+    and :obj:`dpnp.int64`.
 
     See Also
     --------
@@ -220,19 +251,34 @@ def invert(x, **kwargs):
 
     Examples
     --------
-    >>> import dpnp as np
-    >>> x = np.array([13])
-    >>> out = np.invert(x)
+    >>> import dpnp as dp
+    >>> x = dp.array([13])
+    >>> out = dp.invert(x)
     >>> out[0]
     -14
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x, copy_when_nondefault_queue=False)
-    if x1_desc and not kwargs:
-        return dpnp_invert(x1_desc).get_pyobj()
-
-    return call_origin(numpy.invert, x, **kwargs)
+    if kwargs:
+        pass
+    elif where is not True:
+        pass
+    elif dtype is not None:
+        pass
+    elif subok is not True:
+        pass
+    else:
+        x1_desc = dpnp.get_dpnp_descriptor(x, copy_when_nondefault_queue=False)
+        if x1_desc:
+            if out is not None:
+                if not isinstance(out, (dpnp.ndarray, dpt.usm_ndarray)):
+                    raise TypeError("return array must be of supported array type")
+                out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False)
+            else:
+                out_desc = None
+        return dpnp_invert(x1_desc, out_desc).get_pyobj()
+
+    return call_origin(numpy.invert, x, out=out, where=where, dtype=dtype, subok=subok, **kwargs)
 
 
 bitwise_not = invert  # bitwise_not is an alias for invert
@@ -244,12 +290,17 @@ def left_shift(x1, x2, dtype=None, out=None, where=True, **kwargs):
 
     For full documentation refer to :obj:`numpy.left_shift`.
 
+    Returns
+    -------
+    y : dpnp.ndarray
+        An array containing the element-wise results.
+    
     Limitations
     -----------
-    Parameters ``x1`` and ``x2`` are supported as either :obj:`dpnp.ndarray` or scalar.
-    Parameters ``dtype``, ``out`` and ``where`` are supported with their default values.
-    Sizes, shapes and data types of input arrays are supported to be equal.
-    Keyword arguments ``kwargs`` are currently unsupported.
+    Parameters `x1` and `x2` are supported as either scalar, :class:`dpnp.ndarray`
+    or :class:`dpctl.tensor.usm_ndarray`, but both `x1` and `x2` can not be scalars at the same time.
+    Parameters `dtype` and `where` are supported with their default values.
+    Keyword arguments `kwargs` are currently unsupported.
     Otherwise the function will be executed sequentially on CPU.
     Input data is supported as integer only.
 
@@ -276,12 +327,17 @@ def right_shift(x1, x2, dtype=None, out=None, where=True, **kwargs):
 
     For full documentation refer to :obj:`numpy.right_shift`.
 
+    Returns
+    -------
+    y : dpnp.ndarray
+        An array containing the element-wise results.
+    
     Limitations
     -----------
-    Parameters ``x1`` and ``x2`` are supported as either :obj:`dpnp.ndarray` or scalar.
-    Parameters ``dtype``, ``out`` and ``where`` are supported with their default values.
-    Sizes, shapes and data types of input arrays are supported to be equal.
-    Keyword arguments ``kwargs`` are currently unsupported.
+    Parameters `x1` and `x2` are supported as either scalar, :class:`dpnp.ndarray`
+    or :class:`dpctl.tensor.usm_ndarray`, but both `x1` and `x2` can not be scalars at the same time.
+    Parameters `dtype` and `where` are supported with their default values.
+    Keyword arguments `kwargs` are currently unsupported.
     Otherwise the function will be executed sequentially on CPU.
     Input data is supported as integer only.
 
diff --git a/dpnp/dpnp_iface_logic.py b/dpnp/dpnp_iface_logic.py
index 716b2ff8a0f2..e36c44d3f989 100644
--- a/dpnp/dpnp_iface_logic.py
+++ b/dpnp/dpnp_iface_logic.py
@@ -300,7 +300,8 @@ def equal(x1,
                                            alloc_usm_type=usm_type, alloc_queue=queue)
         if x1_desc and x2_desc:
             return dpnp_equal(x1_desc, x2_desc).get_pyobj()
-    return call_origin(numpy.equal, x1, x2)
+
+    return call_origin(numpy.equal, x1, x2, out=out, where=where, dtype=dtype, subok=subok)
 
 
 def greater(x1,
@@ -370,7 +371,8 @@ def greater(x1,
                                            alloc_usm_type=usm_type, alloc_queue=queue)
         if x1_desc and x2_desc:
             return dpnp_greater(x1_desc, x2_desc).get_pyobj()
-    return call_origin(numpy.greater, x1, x2)
+
+    return call_origin(numpy.greater, x1, x2, out=out, where=where, dtype=dtype, subok=subok)
 
 
 def greater_equal(x1,
@@ -440,7 +442,8 @@ def greater_equal(x1,
                                            alloc_usm_type=usm_type, alloc_queue=queue)
         if x1_desc and x2_desc:
             return dpnp_greater_equal(x1_desc, x2_desc).get_pyobj()
-    return call_origin(numpy.greater_equal, x1, x2)
+
+    return call_origin(numpy.greater_equal, x1, x2, out=out, where=where, dtype=dtype, subok=subok)
 
 
 def isclose(x1, x2, rtol=1e-05, atol=1e-08, equal_nan=False):
@@ -685,7 +688,8 @@ def less(x1,
                                            alloc_usm_type=usm_type, alloc_queue=queue)
         if x1_desc and x2_desc:
             return dpnp_less(x1_desc, x2_desc).get_pyobj()
-    return call_origin(numpy.less, x1, x2)
+
+    return call_origin(numpy.less, x1, x2, out=out, where=where, dtype=dtype, subok=subok)
 
 
 def less_equal(x1,
@@ -755,7 +759,8 @@ def less_equal(x1,
                                            alloc_usm_type=usm_type, alloc_queue=queue)
         if x1_desc and x2_desc:
             return dpnp_less_equal(x1_desc, x2_desc).get_pyobj()
-    return call_origin(numpy.less_equal, x1, x2)
+
+    return call_origin(numpy.less_equal, x1, x2, out=out, where=where, dtype=dtype, subok=subok)
 
 
 def logical_and(x1,
@@ -824,7 +829,8 @@ def logical_and(x1,
                                            alloc_usm_type=usm_type, alloc_queue=queue)
         if x1_desc and x2_desc:
             return dpnp_logical_and(x1_desc, x2_desc).get_pyobj()
-    return call_origin(numpy.logical_and, x1, x2)
+
+    return call_origin(numpy.logical_and, x1, x2, out=out, where=where, dtype=dtype, subok=subok)
 
 
 def logical_not(x,
@@ -881,7 +887,8 @@ def logical_not(x,
         x1_desc = dpnp.get_dpnp_descriptor(x, copy_when_strides=False, copy_when_nondefault_queue=False)
         if x1_desc:
             return dpnp_logical_not(x1_desc).get_pyobj()
-    return call_origin(numpy.logical_not, x)
+
+    return call_origin(numpy.logical_not, x, out=out, where=where, dtype=dtype, subok=subok)
 
 
 def logical_or(x1,
@@ -950,7 +957,8 @@ def logical_or(x1,
                                            alloc_usm_type=usm_type, alloc_queue=queue)
         if x1_desc and x2_desc:
             return dpnp_logical_or(x1_desc, x2_desc).get_pyobj()
-    return call_origin(numpy.logical_or, x1, x2)
+
+    return call_origin(numpy.logical_or, x1, x2, out=out, where=where, dtype=dtype, subok=subok)
 
 
 def logical_xor(x1,
@@ -1019,7 +1027,8 @@ def logical_xor(x1,
                                            alloc_usm_type=usm_type, alloc_queue=queue)
         if x1_desc and x2_desc:
             return dpnp_logical_xor(x1_desc, x2_desc).get_pyobj()
-    return call_origin(numpy.logical_xor, x1, x2)
+
+    return call_origin(numpy.logical_xor, x1, x2, out=out, where=where, dtype=dtype, subok=subok)
 
 
 def not_equal(x1,
@@ -1089,4 +1098,5 @@ def not_equal(x1,
                                            alloc_usm_type=usm_type, alloc_queue=queue)
         if x1_desc and x2_desc:
             return dpnp_not_equal(x1_desc, x2_desc).get_pyobj()
-    return call_origin(numpy.not_equal, x1, x2)
+
+    return call_origin(numpy.not_equal, x1, x2, out=out, where=where, dtype=dtype, subok=subok)
diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py
index 03d2a3527750..08de8b2ba5a8 100644
--- a/dpnp/dpnp_iface_mathematical.py
+++ b/dpnp/dpnp_iface_mathematical.py
@@ -44,6 +44,7 @@
 from dpnp.dpnp_utils import *
 
 import dpnp
+
 import numpy
 import dpctl.tensor as dpt
 
@@ -1413,15 +1414,14 @@ def power(x1,
                                            alloc_usm_type=usm_type, alloc_queue=queue)
         x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False,
                                            alloc_usm_type=usm_type, alloc_queue=queue)
-
-        if out is not None:
-            if not isinstance(out, (dpnp.ndarray, dpt.usm_ndarray)):
-                raise TypeError("return array must be of supported array type")
-            out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False)
-        else:
-            out_desc = None
-
         if x1_desc and x2_desc:
+            if out is not None:
+                if not isinstance(out, (dpnp.ndarray, dpt.usm_ndarray)):
+                    raise TypeError("return array must be of supported array type")
+                out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False)
+            else:
+                out_desc = None
+
             return dpnp_power(x1_desc, x2_desc, dtype=dtype, out=out_desc, where=where).get_pyobj()
 
     return call_origin(numpy.power, x1, x2, out=out, where=where, dtype=dtype, subok=subok, **kwargs)
diff --git a/tests/helper.py b/tests/helper.py
index 8432443d488f..1e97615fb3de 100644
--- a/tests/helper.py
+++ b/tests/helper.py
@@ -27,7 +27,9 @@ def get_float_dtypes(no_float16=True,
     dev = dpctl.select_default_device() if device is None else device
 
     # add floating types
-    dtypes = [dpnp.float16] if not no_float16 else []
+    dtypes = []
+    if not no_float16 and dev.has_aspect_fp16:
+        dtypes.append(dpnp.float16)
 
     dtypes.append(dpnp.float32)
     if dev.has_aspect_fp64:
@@ -64,11 +66,11 @@ def get_all_dtypes(no_bool=False,
     dtypes.extend([dpnp.int32, dpnp.int64])
 
     # add floating types
-    dtypes.extend(get_float_dtypes(dev))
+    dtypes.extend(get_float_dtypes(no_float16=no_float16, device=dev))
 
     # add complex types
     if not no_complex:
-        dtypes.extend(get_complex_dtypes(dev))
+        dtypes.extend(get_complex_dtypes(device=dev))
 
     # add None value to validate a default dtype
     if not no_none:
diff --git a/tests/skipped_tests.tbl b/tests/skipped_tests.tbl
index bda10cfd4973..ecc5bd5e999a 100644
--- a/tests/skipped_tests.tbl
+++ b/tests/skipped_tests.tbl
@@ -192,12 +192,7 @@ tests/test_random.py::TestPermutationsTestShuffle::test_shuffle1[lambda x: dpnp.
 tests/test_random.py::TestPermutationsTestShuffle::test_shuffle1[lambda x: dpnp.asarray(x).astype(dpnp.int8)]
 
 tests/test_sort.py::test_partition[[[1, 0], [3, 0]]-float32-1]
-tests/third_party/cupy/binary_tests/test_elementwise.py::TestElementwise::test_bitwise_and
-tests/third_party/cupy/binary_tests/test_elementwise.py::TestElementwise::test_bitwise_or
-tests/third_party/cupy/binary_tests/test_elementwise.py::TestElementwise::test_bitwise_xor
-tests/third_party/cupy/binary_tests/test_elementwise.py::TestElementwise::test_invert
-tests/third_party/cupy/binary_tests/test_elementwise.py::TestElementwise::test_left_shift
-tests/third_party/cupy/binary_tests/test_elementwise.py::TestElementwise::test_right_shift
+
 tests/third_party/cupy/core_tests/test_ndarray_complex_ops.py::TestAngle::test_angle
 tests/third_party/cupy/core_tests/test_ndarray_complex_ops.py::TestRealImag::test_imag
 tests/third_party/cupy/core_tests/test_ndarray_complex_ops.py::TestRealImag::test_imag_inplace
diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl
index 3e0026759334..d3864a05b7e6 100644
--- a/tests/skipped_tests_gpu.tbl
+++ b/tests/skipped_tests_gpu.tbl
@@ -359,12 +359,7 @@ tests/test_random.py::TestPermutationsTestShuffle::test_shuffle1[lambda x: (dpnp
 tests/test_random.py::TestPermutationsTestShuffle::test_shuffle1[lambda x: dpnp.asarray([(i, i) for i in x], [("a", object), ("b", dpnp.int32)])]]
 tests/test_random.py::TestPermutationsTestShuffle::test_shuffle1[lambda x: dpnp.asarray(x).astype(dpnp.int8)]
 tests/test_sort.py::test_partition[[[1, 0], [3, 0]]-float32-1]
-tests/third_party/cupy/binary_tests/test_elementwise.py::TestElementwise::test_bitwise_and
-tests/third_party/cupy/binary_tests/test_elementwise.py::TestElementwise::test_bitwise_or
-tests/third_party/cupy/binary_tests/test_elementwise.py::TestElementwise::test_bitwise_xor
-tests/third_party/cupy/binary_tests/test_elementwise.py::TestElementwise::test_invert
-tests/third_party/cupy/binary_tests/test_elementwise.py::TestElementwise::test_left_shift
-tests/third_party/cupy/binary_tests/test_elementwise.py::TestElementwise::test_right_shift
+
 tests/third_party/cupy/core_tests/test_ndarray_complex_ops.py::TestAngle::test_angle
 tests/third_party/cupy/core_tests/test_ndarray_complex_ops.py::TestRealImag::test_imag
 tests/third_party/cupy/core_tests/test_ndarray_complex_ops.py::TestRealImag::test_imag_inplace
diff --git a/tests/test_bitwise.py b/tests/test_bitwise.py
index 645ae4556c1c..34f7f971c86b 100644
--- a/tests/test_bitwise.py
+++ b/tests/test_bitwise.py
@@ -3,60 +3,97 @@
 import dpnp as inp
 
 import numpy
+from numpy.testing import (
+    assert_array_equal
+)
 
 
 @pytest.mark.parametrize("lhs", [[[-7, -6, -5, -4, -3, -2, -1], [0, 1, 2, 3, 4, 5, 6]], [-3, -2, -1, 0, 1, 2, 3], 0])
 @pytest.mark.parametrize("rhs", [[[0, 1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12, 13]], [0, 1, 2, 3, 4, 5, 6], 3])
-@pytest.mark.parametrize("dtype", [numpy.int32, numpy.int64])
+@pytest.mark.parametrize("dtype", [inp.bool, inp.int32, inp.int64])
 class TestBitwise:
 
     @staticmethod
     def array_or_scalar(xp, data, dtype=None):
         if numpy.isscalar(data):
+            if dtype == inp.bool:
+                return numpy.dtype(dtype).type(data)
             return data
 
         return xp.array(data, dtype=dtype)
 
     def _test_unary_int(self, name, data, dtype):
-        a = self.array_or_scalar(inp, data, dtype=dtype)
-        result = getattr(inp, name)(a)
+        dp_a = self.array_or_scalar(inp, data, dtype=dtype)
+        result = getattr(inp, name)(dp_a)
 
-        a = self.array_or_scalar(numpy, data, dtype=dtype)
-        expected = getattr(numpy, name)(a)
+        np_a = self.array_or_scalar(numpy, data, dtype=dtype)
+        expected = getattr(numpy, name)(np_a)
 
-        numpy.testing.assert_array_equal(result, expected)
+        assert_array_equal(result, expected)
+        return (dp_a, np_a)
 
     def _test_binary_int(self, name, lhs, rhs, dtype):
-        a = self.array_or_scalar(inp, lhs, dtype=dtype)
-        b = self.array_or_scalar(inp, rhs, dtype=dtype)
-        result = getattr(inp, name)(a, b)
+        if name in ('left_shift', 'right_shift') and dtype == inp.bool:
+            pytest.skip("A shift operation isn't implemented for bool type")
+        elif numpy.isscalar(lhs) and numpy.isscalar(rhs):
+            pytest.skip("Both inputs can't be scalars")
 
-        a = self.array_or_scalar(numpy, lhs, dtype=dtype)
-        b = self.array_or_scalar(numpy, rhs, dtype=dtype)
-        expected = getattr(numpy, name)(a, b)
+        dp_a = self.array_or_scalar(inp, lhs, dtype=dtype)
+        dp_b = self.array_or_scalar(inp, rhs, dtype=dtype)
+        result = getattr(inp, name)(dp_a, dp_b)
 
-        numpy.testing.assert_array_equal(result, expected)
+        np_a = self.array_or_scalar(numpy, lhs, dtype=dtype)
+        np_b = self.array_or_scalar(numpy, rhs, dtype=dtype)
+        expected = getattr(numpy, name)(np_a, np_b)
+
+        assert_array_equal(result, expected)
+        return (dp_a, dp_b, np_a, np_b)
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_bitwise_and(self, lhs, rhs, dtype):
-        self._test_binary_int('bitwise_and', lhs, rhs, dtype)
+        dp_a, dp_b, np_a, np_b = self._test_binary_int('bitwise_and', lhs, rhs, dtype)
+        assert_array_equal(dp_a & dp_b, np_a & np_b)
+
+        if not (inp.isscalar(dp_a) or inp.isscalar(dp_b)) and dp_a.shape == dp_b.shape:
+            dp_a &= dp_b
+            np_a &= np_b
+            assert_array_equal(dp_a, np_a)
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_bitwise_or(self, lhs, rhs, dtype):
-        self._test_binary_int('bitwise_or', lhs, rhs, dtype)
+        dp_a, dp_b, np_a, np_b = self._test_binary_int('bitwise_or', lhs, rhs, dtype)
+        assert_array_equal(dp_a | dp_b, np_a | np_b)
+
+        if not (inp.isscalar(dp_a) or inp.isscalar(dp_b)) and dp_a.shape == dp_b.shape:
+            dp_a |= dp_b
+            np_a |= np_b
+            assert_array_equal(dp_a, np_a)
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_bitwise_xor(self, lhs, rhs, dtype):
-        self._test_binary_int('bitwise_xor', lhs, rhs, dtype)
+        dp_a, dp_b, np_a, np_b = self._test_binary_int('bitwise_xor', lhs, rhs, dtype)
+        assert_array_equal(dp_a ^ dp_b, np_a ^ np_b)
+
+        if not (inp.isscalar(dp_a) or inp.isscalar(dp_b)) and dp_a.shape == dp_b.shape:
+            dp_a ^= dp_b
+            np_a ^= np_b
+            assert_array_equal(dp_a, np_a)
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_invert(self, lhs, rhs, dtype):
-        self._test_unary_int('invert', lhs, dtype)
+        dp_a, np_a = self._test_unary_int('invert', lhs, dtype)
+        assert_array_equal(~dp_a, ~np_a)
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_left_shift(self, lhs, rhs, dtype):
-        self._test_binary_int('left_shift', lhs, rhs, dtype)
+        dp_a, dp_b, np_a, np_b = self._test_binary_int('left_shift', lhs, rhs, dtype)
+        assert_array_equal(dp_a << dp_b, np_a << np_b)
+
+        if not (inp.isscalar(dp_a) or inp.isscalar(dp_b)) and dp_a.shape == dp_b.shape:
+            dp_a <<= dp_b
+            np_a <<= np_b
+            assert_array_equal(dp_a, np_a)
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_right_shift(self, lhs, rhs, dtype):
-        self._test_binary_int('right_shift', lhs, rhs, dtype)
+        dp_a, dp_b, np_a, np_b = self._test_binary_int('right_shift', lhs, rhs, dtype)
+        assert_array_equal(dp_a >> dp_b, np_a >> np_b)
+
+        if not (inp.isscalar(dp_a) or inp.isscalar(dp_b)) and dp_a.shape == dp_b.shape:
+            dp_a >>= dp_b
+            np_a >>= np_b
+            assert_array_equal(dp_a, np_a)
diff --git a/tests/test_usm_type.py b/tests/test_usm_type.py
index b0efa89968bd..817bdee66a57 100644
--- a/tests/test_usm_type.py
+++ b/tests/test_usm_type.py
@@ -146,6 +146,24 @@ def test_coerced_usm_types_logic_op(op, usm_type_x, usm_type_y):
     assert z.usm_type == du.get_coerced_usm_type([usm_type_x, usm_type_y])
 
 
+@pytest.mark.parametrize("op",
+                         ['bitwise_and', 'bitwise_or', 'bitwise_xor', 'left_shift', 'right_shift'],
+                         ids=['bitwise_and', 'bitwise_or', 'bitwise_xor', 'left_shift', 'right_shift'])
+@pytest.mark.parametrize("usm_type_x", list_of_usm_types, ids=list_of_usm_types)
+@pytest.mark.parametrize("usm_type_y", list_of_usm_types, ids=list_of_usm_types)
+def test_coerced_usm_types_bitwise_op(op, usm_type_x, usm_type_y):
+    x = dp.arange(25, usm_type = usm_type_x)
+    y = dp.arange(25, usm_type = usm_type_y)[::-1]
+
+    z = getattr(dp, op)(x, y)
+    zx = getattr(dp, op)(x, 7)
+    zy = getattr(dp, op)(12, y)
+
+    assert x.usm_type == zx.usm_type == usm_type_x
+    assert y.usm_type == zy.usm_type == usm_type_y
+    assert z.usm_type == du.get_coerced_usm_type([usm_type_x, usm_type_y])
+
+
 @pytest.mark.parametrize("usm_type_x", list_of_usm_types, ids=list_of_usm_types)
 @pytest.mark.parametrize("usm_type_y", list_of_usm_types, ids=list_of_usm_types)
 def test_meshgrid(usm_type_x, usm_type_y):
diff --git a/tests/third_party/cupy/binary_tests/test_elementwise.py b/tests/third_party/cupy/binary_tests/test_elementwise.py
index b2212e043f23..a01cbb082a37 100644
--- a/tests/third_party/cupy/binary_tests/test_elementwise.py
+++ b/tests/third_party/cupy/binary_tests/test_elementwise.py
@@ -1,18 +1,19 @@
 import unittest
 
+import numpy
 from tests.third_party.cupy import testing
 
 
 @testing.gpu
 class TestElementwise(unittest.TestCase):
 
-    @testing.for_int_dtypes()
+    @testing.for_dtypes((numpy.bool_, numpy.int32, numpy.int64))
     @testing.numpy_cupy_array_equal()
     def check_unary_int(self, name, xp, dtype):
         a = xp.array([-3, -2, -1, 0, 1, 2, 3], dtype=dtype)
         return getattr(xp, name)(a)
 
-    @testing.for_int_dtypes()
+    @testing.for_dtypes((numpy.int32, numpy.int64))
     @testing.numpy_cupy_array_equal()
     def check_binary_int(self, name, xp, dtype):
         a = xp.array([-3, -2, -1, 0, 1, 2, 3], dtype=dtype)

From 464e1408c02dc8b7f4f881c9624cdfa3cb4db7f9 Mon Sep 17 00:00:00 2001
From: Oleksandr Pavlyk <oleksandr.pavlyk@intel.com>
Date: Mon, 13 Mar 2023 06:43:27 -0500
Subject: [PATCH 003/129] Tweaked compiler options used.

Removed linker option -fsycl-device-code-split=per_kernel option
from CFLAGS, added SDL mandated -fwrapv
---
 dpnp/backend/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dpnp/backend/CMakeLists.txt b/dpnp/backend/CMakeLists.txt
index baee709b11ee..330eb1030f3d 100644
--- a/dpnp/backend/CMakeLists.txt
+++ b/dpnp/backend/CMakeLists.txt
@@ -91,7 +91,6 @@ endif()
 # SYCL related compile options
 string(CONCAT COMMON_COMPILE_FLAGS
   "-fsycl "
-  "-fsycl-device-code-split=per_kernel "
   "-fno-approx-func "
   "-fno-finite-math-only "
 )
@@ -173,6 +172,7 @@ if(NOT WIN32)
     "-fno-delete-null-pointer-checks "
     "-fstack-protector-strong "
     "-fno-strict-overflow "
+    "-fwrapv "
     )
   string(APPEND COMMON_LINK_FLAGS
     "LINKER:-z,noexecstack,-z,relro,-z,now "

From d653acf32114089a7c6cec852985002e79a11961 Mon Sep 17 00:00:00 2001
From: Oleksandr Pavlyk <oleksandr.pavlyk@intel.com>
Date: Mon, 13 Mar 2023 06:45:21 -0500
Subject: [PATCH 004/129] Specify dependencies on dpnp_algo on other included
 pyx files

---
 setup.py | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index b7bfba6c28ba..31befbc1b237 100644
--- a/setup.py
+++ b/setup.py
@@ -125,7 +125,7 @@
     '''
     This variable controls setuptools execution on windows
     to avoid automatically search and confirm workability of the compiler
-    If not set, error "Microsoft Visual C++ 14.0 or greater is required." appiars
+    If not set, error "Microsoft Visual C++ 14.0 or greater is required." appears
     '''
     os.environ["DISTUTILS_USE_SDK"] = "1"
 
@@ -145,6 +145,21 @@
 dpnp_algo = Extension(
     name="dpnp.dpnp_algo.dpnp_algo",
     sources=[os.path.join("dpnp", "dpnp_algo", "dpnp_algo.pyx")],
+    depends=[
+        os.path.join("dpnp", "dpnp_algo", "dpnp_algo_linearalgebra.pyx"),
+        os.path.join("dpnp", "dpnp_algo", "dpnp_algo_manipulation.pyx"),
+        os.path.join("dpnp", "dpnp_algo", "dpnp_algo_counting.pyx"),
+        os.path.join("dpnp", "dpnp_algo", "dpnp_algo_statistics.pyx"),
+        os.path.join("dpnp", "dpnp_algo", "dpnp_algo_trigonometric.pyx"),
+        os.path.join("dpnp", "dpnp_algo", "dpnp_algo_sorting.pyx"),
+        os.path.join("dpnp", "dpnp_algo", "dpnp_algo_arraycreation.pyx"),
+        os.path.join("dpnp", "dpnp_algo", "dpnp_algo_mathematical.pyx"),
+        os.path.join("dpnp", "dpnp_algo", "dpnp_algo_searching.pyx"),
+        os.path.join("dpnp", "dpnp_algo", "dpnp_algo_indexing.pyx"),
+        os.path.join("dpnp", "dpnp_algo", "dpnp_algo_logic.pyx"),
+        os.path.join("dpnp", "dpnp_algo", "dpnp_algo_bitwise.pyx"),
+        os.path.join("dpnp", "dpnp_algo", "dpnp_algo_special.pyx"),
+    ],
     **kwargs_common)
 
 dpnp_dparray = Extension(

From 5242cff5dfeba0cecd583557ca897f8bfc0c8fb3 Mon Sep 17 00:00:00 2001
From: Anton Volkov <antonwolfy@gmail.com>
Date: Thu, 16 Mar 2023 09:21:46 -0500
Subject: [PATCH 005/129] Inplace out parameter in elementwise functions

---
 dpnp/backend/kernels/dpnp_krnl_bitwise.cpp   |  4 +-
 dpnp/backend/kernels/dpnp_krnl_elemwise.cpp  |  8 +--
 dpnp/backend/kernels/dpnp_krnl_logic.cpp     |  6 +-
 dpnp/backend/kernels/dpnp_krnl_searching.cpp |  6 +-
 dpnp/dpnp_algo/dpnp_algo.pyx                 | 37 +++++++----
 dpnp/dpnp_iface.py                           |  7 ++-
 dpnp/dpnp_iface_bitwise.py                   |  8 ++-
 dpnp/dpnp_iface_linearalgebra.py             |  2 +-
 dpnp/dpnp_iface_mathematical.py              | 64 +++++++++++---------
 dpnp/dpnp_utils/dpnp_algo_utils.pxd          |  1 +
 dpnp/dpnp_utils/dpnp_algo_utils.pyx          | 22 ++++++-
 11 files changed, 108 insertions(+), 57 deletions(-)

diff --git a/dpnp/backend/kernels/dpnp_krnl_bitwise.cpp b/dpnp/backend/kernels/dpnp_krnl_bitwise.cpp
index f3d8a4a95ccd..6264d5d7146d 100644
--- a/dpnp/backend/kernels/dpnp_krnl_bitwise.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_bitwise.cpp
@@ -281,8 +281,8 @@ static void func_map_init_bitwise_1arg_1type(func_map_t& fmap)
                 const size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */                  \
                 {                                                                                                      \
                     const shape_elem_type* result_strides_data = &dev_strides_data[0];                                 \
-                    const shape_elem_type* input1_strides_data = &dev_strides_data[1];                                 \
-                    const shape_elem_type* input2_strides_data = &dev_strides_data[2];                                 \
+                    const shape_elem_type* input1_strides_data = &dev_strides_data[result_ndim];                       \
+                    const shape_elem_type* input2_strides_data = &dev_strides_data[2 * result_ndim];                   \
                                                                                                                        \
                     size_t input1_id = 0;                                                                              \
                     size_t input2_id = 0;                                                                              \
diff --git a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp
index 741a945fb099..0f691a03ab60 100644
--- a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp
@@ -111,7 +111,7 @@
                 size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */                        \
                 {                                                                                                      \
                     const shape_elem_type* result_strides_data = &dev_strides_data[0];                                 \
-                    const shape_elem_type* input1_strides_data = &dev_strides_data[1];                                 \
+                    const shape_elem_type* input1_strides_data = &dev_strides_data[result_ndim];                       \
                                                                                                                        \
                     size_t input_id = 0;                                                                               \
                     for (size_t i = 0; i < input1_ndim; ++i)                                                           \
@@ -635,7 +635,7 @@ static void func_map_init_elemwise_1arg_2type(func_map_t& fmap)
                 size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */                        \
                 {                                                                                                      \
                     const shape_elem_type* result_strides_data = &dev_strides_data[0];                                 \
-                    const shape_elem_type* input1_strides_data = &dev_strides_data[1];                                 \
+                    const shape_elem_type* input1_strides_data = &dev_strides_data[result_ndim];                       \
                                                                                                                        \
                     size_t input_id = 0;                                                                               \
                     for (size_t i = 0; i < input1_ndim; ++i)                                                           \
@@ -995,8 +995,8 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap)
                 const size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */                  \
                 {                                                                                                      \
                     const shape_elem_type* result_strides_data = &dev_strides_data[0];                                 \
-                    const shape_elem_type* input1_strides_data = &dev_strides_data[1];                                 \
-                    const shape_elem_type* input2_strides_data = &dev_strides_data[2];                                 \
+                    const shape_elem_type* input1_strides_data = &dev_strides_data[result_ndim];                       \
+                    const shape_elem_type* input2_strides_data = &dev_strides_data[2 * result_ndim];                   \
                                                                                                                        \
                     size_t input1_id = 0;                                                                              \
                     size_t input2_id = 0;                                                                              \
diff --git a/dpnp/backend/kernels/dpnp_krnl_logic.cpp b/dpnp/backend/kernels/dpnp_krnl_logic.cpp
index d1a6767c2adc..78a9a29e99e7 100644
--- a/dpnp/backend/kernels/dpnp_krnl_logic.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_logic.cpp
@@ -396,7 +396,7 @@ DPCTLSyclEventRef (*dpnp_any_ext_c)(DPCTLSyclQueueRef,
                 const size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */                  \
                 {                                                                                                      \
                     const shape_elem_type *result_strides_data = &dev_strides_data[0];                                 \
-                    const shape_elem_type *input1_strides_data = &dev_strides_data[1];                                 \
+                    const shape_elem_type *input1_strides_data = &dev_strides_data[result_ndim];                       \
                                                                                                                        \
                     size_t input1_id = 0;                                                                              \
                                                                                                                        \
@@ -635,8 +635,8 @@ static void func_map_logic_1arg_1type_helper(func_map_t& fmap)
                 const size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */                  \
                 {                                                                                                      \
                     const shape_elem_type *result_strides_data = &dev_strides_data[0];                                 \
-                    const shape_elem_type *input1_strides_data = &dev_strides_data[1];                                 \
-                    const shape_elem_type *input2_strides_data = &dev_strides_data[2];                                 \
+                    const shape_elem_type *input1_strides_data = &dev_strides_data[result_ndim];                       \
+                    const shape_elem_type *input2_strides_data = &dev_strides_data[2 * result_ndim];                   \
                                                                                                                        \
                     size_t input1_id = 0;                                                                              \
                     size_t input2_id = 0;                                                                              \
diff --git a/dpnp/backend/kernels/dpnp_krnl_searching.cpp b/dpnp/backend/kernels/dpnp_krnl_searching.cpp
index fef5f78d15da..471d524643f5 100644
--- a/dpnp/backend/kernels/dpnp_krnl_searching.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_searching.cpp
@@ -294,9 +294,9 @@ DPCTLSyclEventRef dpnp_where_c(DPCTLSyclQueueRef q_ref,
             const size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */
             {
                 const shape_elem_type* result_strides_data = &dev_strides_data[0];
-                const shape_elem_type* condition_strides_data = &dev_strides_data[1];
-                const shape_elem_type* input1_strides_data = &dev_strides_data[2];
-                const shape_elem_type* input2_strides_data = &dev_strides_data[3];
+                const shape_elem_type* condition_strides_data = &dev_strides_data[result_ndim];
+                const shape_elem_type* input1_strides_data = &dev_strides_data[2 * result_ndim];
+                const shape_elem_type* input2_strides_data = &dev_strides_data[3 * result_ndim];
 
                 size_t condition_id = 0;
                 size_t input1_id = 0;
diff --git a/dpnp/dpnp_algo/dpnp_algo.pyx b/dpnp/dpnp_algo/dpnp_algo.pyx
index 2fa9de34b998..2202ba5cfa47 100644
--- a/dpnp/dpnp_algo/dpnp_algo.pyx
+++ b/dpnp/dpnp_algo/dpnp_algo.pyx
@@ -505,8 +505,25 @@ cdef utils.dpnp_descriptor call_fptr_2in_1out_strides(DPNPFuncName fptr_name,
         return_type = kernel_data.return_type_no_fp64
         func = < fptr_2in_1out_strides_t > kernel_data.ptr_no_fp64
 
-    if out is None:
-        """ Create result array with type given by FPTR data """
+    # check 'out' parameter data
+    if out is not None:
+        result_type = dpnp_DPNPFuncType_to_dtype(< size_t > return_type)
+        if out.dtype != result_type:
+            utils.checker_throw_value_error(func_name, 'out.dtype', out.dtype, result_type)
+        if out.shape != result_shape:
+            utils.checker_throw_value_error(func_name, 'out.shape', out.shape, result_shape)
+
+        utils.get_common_usm_allocation(x1_obj, out)  # check USM allocation is common
+
+    if out is None or out.data in (x1_obj.data, x2_obj.data):
+        """
+        Create result array with type given by FPTR data.
+        If 'out' array refers to the same memory as input arrays, we have to create a temporary array
+        and to copy data from the temporary into 'out' array, once the computation is completed.
+        Otherwise simultaneously access to the same memory may cause a race condition issue
+        which will result into undefined behaviour.
+        """
+        is_result_memory_allocated = True
         result = utils.create_output_descriptor(result_shape,
                                                 return_type,
                                                 None,
@@ -514,16 +531,9 @@ cdef utils.dpnp_descriptor call_fptr_2in_1out_strides(DPNPFuncName fptr_name,
                                                 usm_type=result_usm_type,
                                                 sycl_queue=result_sycl_queue)
     else:
-        result_type = dpnp_DPNPFuncType_to_dtype(< size_t > return_type)
-        if out.dtype != result_type:
-            utils.checker_throw_value_error(func_name, 'out.dtype', out.dtype, result_type)
-        if out.shape != result_shape:
-            utils.checker_throw_value_error(func_name, 'out.shape', out.shape, result_shape)
-
+        is_result_memory_allocated = False
         result = out
 
-        utils.get_common_usm_allocation(x1_obj, result)  # check USM allocation is common
-
     cdef shape_type_c result_strides = utils.strides_to_vector(result.strides, result_shape)
 
     result_obj = result.get_array()
@@ -554,4 +564,9 @@ cdef utils.dpnp_descriptor call_fptr_2in_1out_strides(DPNPFuncName fptr_name,
     with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
     c_dpctl.DPCTLEvent_Delete(event_ref)
 
-    return result
+    if out is not None and is_result_memory_allocated:
+        # copy the result data back to output array
+        out.get_array()[...] = result.get_array()
+        return out
+
+    return result.get_result_desc()
diff --git a/dpnp/dpnp_iface.py b/dpnp/dpnp_iface.py
index 9bf456060ddd..6a5bcf239df2 100644
--- a/dpnp/dpnp_iface.py
+++ b/dpnp/dpnp_iface.py
@@ -272,6 +272,10 @@ def get_dpnp_descriptor(ext_obj,
     if use_origin_backend():
         return False
 
+    # It's required to keep track of input object if a non-strided copy is going to be created.
+    # Thus there will be an extra descriptor allocated to refer on original input.
+    orig_desc = None
+
     # If input object is a scalar, it means it was allocated on host memory.
     # We need to copy it to USM memory according to compute follows data paradigm.
     if isscalar(ext_obj):
@@ -291,6 +295,7 @@ def get_dpnp_descriptor(ext_obj,
             ext_obj_offset = 0
 
         if ext_obj.strides != shape_offsets or ext_obj_offset != 0:
+            orig_desc = dpnp_descriptor(ext_obj)
             ext_obj = array(ext_obj)
 
     # while dpnp functions are based on DPNP_QUEUE
@@ -304,7 +309,7 @@ def get_dpnp_descriptor(ext_obj,
         if not queue_is_default:
             ext_obj = array(ext_obj, sycl_queue=default_queue)
 
-    dpnp_desc = dpnp_descriptor(ext_obj)
+    dpnp_desc = dpnp_descriptor(ext_obj, orig_desc)
     if dpnp_desc.is_valid:
         return dpnp_desc
 
diff --git a/dpnp/dpnp_iface_bitwise.py b/dpnp/dpnp_iface_bitwise.py
index 36f37f4282ec..92f33bc6310a 100644
--- a/dpnp/dpnp_iface_bitwise.py
+++ b/dpnp/dpnp_iface_bitwise.py
@@ -62,7 +62,9 @@
 def _check_nd_call(origin_func, dpnp_func, x1, x2, dtype=None, out=None, where=True, **kwargs):
     """Choose function to call based on input and call chosen fucntion."""
 
-    if where is not True:
+    if kwargs:
+        pass
+    elif where is not True:
         pass
     elif dtype is not None:
         pass
@@ -85,7 +87,7 @@ def _check_nd_call(origin_func, dpnp_func, x1, x2, dtype=None, out=None, where=T
             if out is not None:
                 if not isinstance(out, (dpnp.ndarray, dpt.usm_ndarray)):
                     raise TypeError("return array must be of supported array type")
-                out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False)
+                out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) or None
             else:
                 out_desc = None
 
@@ -273,7 +275,7 @@ def invert(x,
             if out is not None:
                 if not isinstance(out, (dpnp.ndarray, dpt.usm_ndarray)):
                     raise TypeError("return array must be of supported array type")
-                out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False)
+                out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) or None
             else:
                 out_desc = None
         return dpnp_invert(x1_desc, out_desc).get_pyobj()
diff --git a/dpnp/dpnp_iface_linearalgebra.py b/dpnp/dpnp_iface_linearalgebra.py
index a989f745c0a1..2a643fc8469b 100644
--- a/dpnp/dpnp_iface_linearalgebra.py
+++ b/dpnp/dpnp_iface_linearalgebra.py
@@ -114,7 +114,7 @@ def dot(x1, x2, out=None, **kwargs):
             if out is not None:
                 if not isinstance(out, (dpnp.ndarray, dpt.usm_ndarray)):
                     raise TypeError("return array must be of supported array type")
-                out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False)
+                out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) or None
             else:
                 out_desc = None
             return dpnp_dot(x1_desc, x2_desc, out=out_desc).get_pyobj()
diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py
index 08de8b2ba5a8..a001b055a280 100644
--- a/dpnp/dpnp_iface_mathematical.py
+++ b/dpnp/dpnp_iface_mathematical.py
@@ -95,6 +95,41 @@
 ]
 
 
+def _check_nd_call(origin_func, dpnp_func, x1, x2, out=None, where=True, dtype=None, subok=True, **kwargs):
+    """Choose function to call based on input and call chosen fucntion."""
+
+    if kwargs:
+        pass
+    elif where is not True:
+        pass
+    elif dtype is not None:
+        pass
+    elif subok is not True:
+        pass
+    elif dpnp.isscalar(x1) and dpnp.isscalar(x2):
+        # at least either x1 or x2 has to be an array
+        pass
+    else:
+        # get USM type and queue to copy scalar from the host memory into a USM allocation
+        usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None)
+
+        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        if x1_desc and x2_desc:
+            if out is not None:
+                if not isinstance(out, (dpnp.ndarray, dpt.usm_ndarray)):
+                    raise TypeError("return array must be of supported array type")
+                out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) or None
+            else:
+                out_desc = None
+
+            return dpnp_func(x1_desc, x2_desc, dtype=dtype, out=out_desc, where=where).get_pyobj()
+
+    return call_origin(origin_func, x1, x2, dtype=dtype, out=out, where=where, **kwargs)
+
+
 def abs(*args, **kwargs):
     """
     Calculate the absolute value element-wise.
@@ -1397,34 +1432,7 @@ def power(x1,
 
     """
 
-    if where is not True:
-        pass
-    elif dtype is not None:
-        pass
-    elif subok is not True:
-        pass
-    elif dpnp.isscalar(x1) and dpnp.isscalar(x2):
-        # at least either x1 or x2 has to be an array
-        pass
-    else:
-        # get USM type and queue to copy scalar from the host memory into a USM allocation
-        usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None)
-
-        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False,
-                                           alloc_usm_type=usm_type, alloc_queue=queue)
-        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False,
-                                           alloc_usm_type=usm_type, alloc_queue=queue)
-        if x1_desc and x2_desc:
-            if out is not None:
-                if not isinstance(out, (dpnp.ndarray, dpt.usm_ndarray)):
-                    raise TypeError("return array must be of supported array type")
-                out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False)
-            else:
-                out_desc = None
-
-            return dpnp_power(x1_desc, x2_desc, dtype=dtype, out=out_desc, where=where).get_pyobj()
-
-    return call_origin(numpy.power, x1, x2, out=out, where=where, dtype=dtype, subok=subok, **kwargs)
+    return _check_nd_call(numpy.power, dpnp_power, x1, x2, out=out, where=where, dtype=dtype, subok=subok, **kwargs)
 
 
 def prod(x1, axis=None, dtype=None, out=None, keepdims=False, initial=None, where=True):
diff --git a/dpnp/dpnp_utils/dpnp_algo_utils.pxd b/dpnp/dpnp_utils/dpnp_algo_utils.pxd
index db7127319bb0..2348faf94ccb 100644
--- a/dpnp/dpnp_utils/dpnp_algo_utils.pxd
+++ b/dpnp/dpnp_utils/dpnp_algo_utils.pxd
@@ -116,6 +116,7 @@ cdef class dpnp_descriptor:
 
     cdef public:  # TODO remove "public" as python accessible attribute
         object origin_pyobj
+        dpnp_descriptor origin_desc
         dict descriptor
         Py_ssize_t dpnp_descriptor_data_size
         cpp_bool dpnp_descriptor_is_scalar
diff --git a/dpnp/dpnp_utils/dpnp_algo_utils.pyx b/dpnp/dpnp_utils/dpnp_algo_utils.pyx
index a94381788764..43d473e5c235 100644
--- a/dpnp/dpnp_utils/dpnp_algo_utils.pyx
+++ b/dpnp/dpnp_utils/dpnp_algo_utils.pyx
@@ -660,9 +660,10 @@ cdef tuple get_common_usm_allocation(dpnp_descriptor x1, dpnp_descriptor x2):
 
 
 cdef class dpnp_descriptor:
-    def __init__(self, obj):
+    def __init__(self, obj, dpnp_descriptor orig_desc=None):
         """ Initialze variables """
         self.origin_pyobj = None
+        self.origin_desc = None
         self.descriptor = None
         self.dpnp_descriptor_data_size = 0
         self.dpnp_descriptor_is_scalar = True
@@ -681,6 +682,10 @@ cdef class dpnp_descriptor:
 
         self.origin_pyobj = obj
 
+        """ Keep track of a descriptor with original data """
+        if orig_desc is not None and orig_desc.is_valid:
+            self.origin_desc = orig_desc
+
         """ array size calculation """
         cdef Py_ssize_t shape_it = 0
         self.dpnp_descriptor_data_size = 1
@@ -740,6 +745,14 @@ cdef class dpnp_descriptor:
     def is_scalar(self):
         return self.dpnp_descriptor_is_scalar
 
+    @property
+    def is_tempored(self):
+        """
+        Non-none descriptor of original data means the current descriptor
+        holds a temporary allocated data.
+        """
+        return self.origin_desc is not None
+
     @property
     def data(self):
         if self.is_valid:
@@ -784,6 +797,13 @@ cdef class dpnp_descriptor:
             "expected either dpctl.tensor.usm_ndarray or dpnp.dpnp_array.dpnp_array, got {}"
             "".format(type(self.origin_pyobj)))
 
+    def get_result_desc(self):
+        if self.is_tempored:
+            """ Copy the result data into an original array """
+            self.origin_desc.get_array()[:] = self.get_array()
+            return self.origin_desc
+        return self
+
     cdef void * get_data(self):
         cdef Py_ssize_t item_size = 0
         cdef Py_ssize_t elem_offset = 0

From 64093b209015a603c98d96244a8fc4148229d3f0 Mon Sep 17 00:00:00 2001
From: vlad-perevezentsev <vladislav.perevezentsev@intel.com>
Date: Fri, 17 Mar 2023 16:55:36 +0100
Subject: [PATCH 006/129] Add support of dpnp.extract() (#1340)

* Add dpnp.extract() using dpctl.tensor.extract()
---
 dpnp/dpnp_iface_indexing.py                   | 37 ++++++++++++++++++-
 tests/__init__.py                             |  3 ++
 tests/helper.py                               |  9 +++++
 tests/skipped_tests.tbl                       |  6 ---
 tests/skipped_tests_gpu.tbl                   |  6 ---
 tests/test_indexing.py                        | 14 +++++++
 .../cupy/indexing_tests/test_indexing.py      |  4 ++
 tests/third_party/cupy/testing/helper.py      | 11 +++++-
 8 files changed, 75 insertions(+), 15 deletions(-)

diff --git a/dpnp/dpnp_iface_indexing.py b/dpnp/dpnp_iface_indexing.py
index ad2eb9794f6a..9a026b6f570f 100644
--- a/dpnp/dpnp_iface_indexing.py
+++ b/dpnp/dpnp_iface_indexing.py
@@ -54,6 +54,7 @@
     "diag_indices",
     "diag_indices_from",
     "diagonal",
+    "extract",
     "fill_diagonal",
     "indices",
     "nonzero",
@@ -232,6 +233,40 @@ def diagonal(x1, offset=0, axis1=0, axis2=1):
     return call_origin(numpy.diagonal, x1, offset, axis1, axis2)
 
 
+def extract(condition, x):
+    """
+    Return the elements of an array that satisfy some condition.
+    For full documentation refer to :obj:`numpy.extract`.
+
+    Returns
+    -------
+    y : dpnp.ndarray
+        Rank 1 array of values from `x` where `condition` is True.
+
+    Limitations
+    -----------
+    Parameters `condition` and `x` are supported either as
+    :class:`dpnp.ndarray` or :class:`dpctl.tensor.usm_ndarray`.
+    Parameter `x` must be the same shape as `condition`.
+    Otherwise the function will be executed sequentially on CPU.
+    """
+
+    check_input_type = lambda x: isinstance(x, (dpnp_array, dpt.usm_ndarray))
+    if check_input_type(condition) and check_input_type(x):
+        if condition.shape != x.shape:
+            pass
+        else:
+            dpt_condition = (
+                condition.get_array()
+                if isinstance(condition, dpnp_array)
+                else condition
+            )
+            dpt_array = x.get_array() if isinstance(x, dpnp_array) else x
+            return dpnp_array._create_from_usm_ndarray(dpt.extract(dpt_condition, dpt_array))
+
+    return call_origin(numpy.extract, condition, x)
+
+
 def fill_diagonal(x1, val, wrap=False):
     """
     Fill the main diagonal of the given array of any dimensionality.
@@ -296,7 +331,7 @@ def nonzero(x, /):
     -------
     y : tuple[dpnp.ndarray]
         Indices of elements that are non-zero.
-    
+
     Limitations
     -----------
     Parameters `x` is supported as either :class:`dpnp.ndarray`
diff --git a/tests/__init__.py b/tests/__init__.py
index e4085539a910..3efd595c0a04 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -1,4 +1,5 @@
 from tests.third_party.cupy import testing as cupy_testing
+from .helper import has_support_aspect64
 import dpnp
 import numpy
 
@@ -17,6 +18,8 @@
 
 
 def _shaped_arange(shape, xp=dpnp, dtype=dpnp.float64, order='C'):
+    if dtype is dpnp.float64:
+        dtype = dpnp.float32 if not has_support_aspect64() else dtype
     res = xp.array(orig_shaped_arange(shape, xp=numpy, dtype=dtype, order=order), dtype=dtype)
     return res
 
diff --git a/tests/helper.py b/tests/helper.py
index 1e97615fb3de..de75251305c8 100644
--- a/tests/helper.py
+++ b/tests/helper.py
@@ -91,3 +91,12 @@ def is_win_platform():
     Return True if a test is runing on Windows OS, False otherwise.
     """
     return platform.startswith('win')
+
+
+def has_support_aspect64(device=None):
+    """
+    Return True if the device supports 64-bit precision floating point operations,
+    False otherwise.
+    """
+    dev = dpctl.select_default_device() if device is None else device
+    return dev.has_aspect_fp64
diff --git a/tests/skipped_tests.tbl b/tests/skipped_tests.tbl
index ecc5bd5e999a..f745b217db63 100644
--- a/tests/skipped_tests.tbl
+++ b/tests/skipped_tests.tbl
@@ -456,12 +456,6 @@ tests/third_party/cupy/indexing_tests/test_indexing.py::TestIndexing::test_compr
 tests/third_party/cupy/indexing_tests/test_indexing.py::TestIndexing::test_compress_empty_1dim_no_axis
 tests/third_party/cupy/indexing_tests/test_indexing.py::TestIndexing::test_compress_no_axis
 tests/third_party/cupy/indexing_tests/test_indexing.py::TestIndexing::test_compress_no_bool
-tests/third_party/cupy/indexing_tests/test_indexing.py::TestIndexing::test_extract
-tests/third_party/cupy/indexing_tests/test_indexing.py::TestIndexing::test_extract_empty_1dim
-tests/third_party/cupy/indexing_tests/test_indexing.py::TestIndexing::test_extract_no_bool
-tests/third_party/cupy/indexing_tests/test_indexing.py::TestIndexing::test_extract_shape_mismatch
-tests/third_party/cupy/indexing_tests/test_indexing.py::TestIndexing::test_extract_size_mismatch
-tests/third_party/cupy/indexing_tests/test_indexing.py::TestIndexing::test_extract_size_mismatch2
 tests/third_party/cupy/indexing_tests/test_indexing.py::TestIndexing::test_take_index_range_overflow
 tests/third_party/cupy/indexing_tests/test_indexing.py::TestSelect::test_select
 tests/third_party/cupy/indexing_tests/test_indexing.py::TestSelect::test_select_1D_choicelist
diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl
index d3864a05b7e6..2108b772a002 100644
--- a/tests/skipped_tests_gpu.tbl
+++ b/tests/skipped_tests_gpu.tbl
@@ -650,12 +650,6 @@ tests/third_party/cupy/indexing_tests/test_indexing.py::TestIndexing::test_compr
 tests/third_party/cupy/indexing_tests/test_indexing.py::TestIndexing::test_compress_empty_1dim_no_axis
 tests/third_party/cupy/indexing_tests/test_indexing.py::TestIndexing::test_compress_no_axis
 tests/third_party/cupy/indexing_tests/test_indexing.py::TestIndexing::test_compress_no_bool
-tests/third_party/cupy/indexing_tests/test_indexing.py::TestIndexing::test_extract
-tests/third_party/cupy/indexing_tests/test_indexing.py::TestIndexing::test_extract_empty_1dim
-tests/third_party/cupy/indexing_tests/test_indexing.py::TestIndexing::test_extract_no_bool
-tests/third_party/cupy/indexing_tests/test_indexing.py::TestIndexing::test_extract_shape_mismatch
-tests/third_party/cupy/indexing_tests/test_indexing.py::TestIndexing::test_extract_size_mismatch
-tests/third_party/cupy/indexing_tests/test_indexing.py::TestIndexing::test_extract_size_mismatch2
 tests/third_party/cupy/indexing_tests/test_indexing.py::TestIndexing::test_take_index_range_overflow
 tests/third_party/cupy/indexing_tests/test_indexing.py::TestSelect::test_select
 tests/third_party/cupy/indexing_tests/test_indexing.py::TestSelect::test_select_1D_choicelist
diff --git a/tests/test_indexing.py b/tests/test_indexing.py
index 1a40777afac8..962e7f6b7b4a 100644
--- a/tests/test_indexing.py
+++ b/tests/test_indexing.py
@@ -1,4 +1,6 @@
 import pytest
+from .helper import get_all_dtypes
+
 
 import dpnp
 
@@ -53,6 +55,18 @@ def test_diagonal(array, offset):
     assert_array_equal(expected, result)
 
 
+@pytest.mark.parametrize("arr_dtype", get_all_dtypes())
+@pytest.mark.parametrize("cond_dtype", get_all_dtypes())
+def test_extract_1d(arr_dtype, cond_dtype):
+    a = numpy.array([-2, -1, 0, 1, 2, 3], dtype=arr_dtype)
+    ia = dpnp.array(a)
+    cond = numpy.array([1, -1, 2, 0, -2, 3], dtype=cond_dtype)
+    icond = dpnp.array(cond)
+    expected = numpy.extract(cond, a)
+    result = dpnp.extract(icond, ia)
+    assert_array_equal(expected, result)
+
+
 @pytest.mark.parametrize("val",
                          [-1, 0, 1],
                          ids=['-1', '0', '1'])
diff --git a/tests/third_party/cupy/indexing_tests/test_indexing.py b/tests/third_party/cupy/indexing_tests/test_indexing.py
index 1cdab954bbe8..e8292831b747 100644
--- a/tests/third_party/cupy/indexing_tests/test_indexing.py
+++ b/tests/third_party/cupy/indexing_tests/test_indexing.py
@@ -166,6 +166,7 @@ def test_extract_no_bool(self, xp, dtype):
         b = xp.array([[1, 0, 1], [0, 1, 0], [1, 0, 1]], dtype=dtype)
         return xp.extract(b, a)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.numpy_cupy_array_equal()
     def test_extract_shape_mismatch(self, xp):
         a = testing.shaped_arange((2, 3), xp)
@@ -174,6 +175,7 @@ def test_extract_shape_mismatch(self, xp):
                       [True, False]])
         return xp.extract(b, a)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.numpy_cupy_array_equal()
     def test_extract_size_mismatch(self, xp):
         a = testing.shaped_arange((3, 3), xp)
@@ -181,6 +183,7 @@ def test_extract_size_mismatch(self, xp):
                       [False, True, False]])
         return xp.extract(b, a)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.numpy_cupy_array_equal()
     def test_extract_size_mismatch2(self, xp):
         a = testing.shaped_arange((3, 3), xp)
@@ -188,6 +191,7 @@ def test_extract_size_mismatch2(self, xp):
                       [False, True, False, True]])
         return xp.extract(b, a)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.numpy_cupy_array_equal()
     def test_extract_empty_1dim(self, xp):
         a = testing.shaped_arange((3, 3), xp)
diff --git a/tests/third_party/cupy/testing/helper.py b/tests/third_party/cupy/testing/helper.py
index 5f9864dadc59..6331309820d2 100644
--- a/tests/third_party/cupy/testing/helper.py
+++ b/tests/third_party/cupy/testing/helper.py
@@ -15,6 +15,7 @@
 # from dpnp.core import internal
 from tests.third_party.cupy.testing import array
 from tests.third_party.cupy.testing import parameterized
+from dpctl import select_default_device
 # import dpnp
 # import dpnp.scipy.sparse
 
@@ -654,9 +655,15 @@ def test_func(self, *args, **kw):
         return test_func
     return decorator
 
+def _get_supported_float_dtypes():
+    if select_default_device().has_aspect_fp64:
+        return (numpy.float64, numpy.float32)
+    else:
+        return (numpy.float32,)
+
 
 _complex_dtypes = ()
-_regular_float_dtypes = (numpy.float64, numpy.float32)
+_regular_float_dtypes = _get_supported_float_dtypes()
 _float_dtypes = _regular_float_dtypes
 _signed_dtypes = ()
 _unsigned_dtypes = tuple(numpy.dtype(i).type for i in 'BHILQ')
@@ -667,7 +674,7 @@ def test_func(self, *args, **kw):
 
 
 def _make_all_dtypes(no_float16, no_bool, no_complex):
-    return (numpy.float64, numpy.float32, numpy.int64, numpy.int32)
+    return (numpy.int64, numpy.int32) + _get_supported_float_dtypes()
 #     if no_float16:
 #         dtypes = _regular_float_dtypes
 #     else:

From 4de4ef99c4a6393bad0b8c7c8261e017a4b42dc8 Mon Sep 17 00:00:00 2001
From: vlad-perevezentsev <vladislav.perevezentsev@intel.com>
Date: Mon, 20 Mar 2023 13:03:39 +0100
Subject: [PATCH 007/129] Reuse dpctl.tensor.place for dpnp.place (#1337)

* Reuse dpctl.tensor.place for dpnp.place

* Remove unused declaration and usefixtures
---
 dpnp/backend/include/dpnp_iface_fptr.hpp      |  1 -
 dpnp/backend/kernels/dpnp_krnl_indexing.cpp   | 15 +------
 dpnp/dpnp_algo/dpnp_algo.pxd                  |  1 -
 dpnp/dpnp_algo/dpnp_algo_indexing.pyx         | 43 -------------------
 dpnp/dpnp_iface_indexing.py                   | 20 +++++----
 tests/skipped_tests.tbl                       |  6 ---
 tests/skipped_tests_gpu.tbl                   | 15 -------
 tests/test_indexing.py                        | 12 +++---
 .../cupy/indexing_tests/test_insert.py        |  1 +
 9 files changed, 19 insertions(+), 95 deletions(-)

diff --git a/dpnp/backend/include/dpnp_iface_fptr.hpp b/dpnp/backend/include/dpnp_iface_fptr.hpp
index 197623efe454..653471fd1b50 100644
--- a/dpnp/backend/include/dpnp_iface_fptr.hpp
+++ b/dpnp/backend/include/dpnp_iface_fptr.hpp
@@ -240,7 +240,6 @@ enum class DPNPFuncName : size_t
     DPNP_FN_PARTITION,                    /**< Used in numpy.partition() impl */
     DPNP_FN_PARTITION_EXT,                /**< Used in numpy.partition() impl, requires extra parameters */
     DPNP_FN_PLACE,                        /**< Used in numpy.place() impl  */
-    DPNP_FN_PLACE_EXT,                    /**< Used in numpy.place() impl, requires extra parameters */
     DPNP_FN_POWER,                        /**< Used in numpy.power() impl  */
     DPNP_FN_POWER_EXT,                    /**< Used in numpy.power() impl, requires extra parameters */
     DPNP_FN_PROD,                         /**< Used in numpy.prod() impl  */
diff --git a/dpnp/backend/kernels/dpnp_krnl_indexing.cpp b/dpnp/backend/kernels/dpnp_krnl_indexing.cpp
index 756899b6cc50..ac71f4fbc5f6 100644
--- a/dpnp/backend/kernels/dpnp_krnl_indexing.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_indexing.cpp
@@ -546,20 +546,12 @@ void dpnp_place_c(void* arr_in, long* mask_in, void* vals_in, const size_t arr_s
                                                           vals_size,
                                                           dep_event_vec_ref);
     DPCTLEvent_WaitAndThrow(event_ref);
+    DPCTLEvent_Delete(event_ref);
 }
 
 template <typename _DataType>
 void (*dpnp_place_default_c)(void*, long*, void*, const size_t, const size_t) = dpnp_place_c<_DataType>;
 
-template <typename _DataType>
-DPCTLSyclEventRef (*dpnp_place_ext_c)(DPCTLSyclQueueRef,
-                                      void*,
-                                      long*,
-                                      void*,
-                                      const size_t,
-                                      const size_t,
-                                      const DPCTLEventVectorRef) = dpnp_place_c<_DataType>;
-
 template <typename _DataType, typename _IndecesType, typename _ValueType>
 DPCTLSyclEventRef dpnp_put_c(DPCTLSyclQueueRef q_ref,
                              void* array1_in,
@@ -1017,11 +1009,6 @@ void func_map_init_indexing_func(func_map_t& fmap)
     fmap[DPNPFuncName::DPNP_FN_PLACE][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_place_default_c<float>};
     fmap[DPNPFuncName::DPNP_FN_PLACE][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_place_default_c<double>};
 
-    fmap[DPNPFuncName::DPNP_FN_PLACE_EXT][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_place_ext_c<int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_PLACE_EXT][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_place_ext_c<int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_PLACE_EXT][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_place_ext_c<float>};
-    fmap[DPNPFuncName::DPNP_FN_PLACE_EXT][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_place_ext_c<double>};
-
     fmap[DPNPFuncName::DPNP_FN_PUT][eft_INT][eft_INT] = {eft_INT,
                                                          (void*)dpnp_put_default_c<int32_t, int64_t, int32_t>};
     fmap[DPNPFuncName::DPNP_FN_PUT][eft_LNG][eft_LNG] = {eft_LNG,
diff --git a/dpnp/dpnp_algo/dpnp_algo.pxd b/dpnp/dpnp_algo/dpnp_algo.pxd
index da1efddd3ccc..c2bb15102cf0 100644
--- a/dpnp/dpnp_algo/dpnp_algo.pxd
+++ b/dpnp/dpnp_algo/dpnp_algo.pxd
@@ -217,7 +217,6 @@ cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName":  # need this na
         DPNP_FN_PARTITION
         DPNP_FN_PARTITION_EXT
         DPNP_FN_PLACE
-        DPNP_FN_PLACE_EXT
         DPNP_FN_POWER
         DPNP_FN_POWER_EXT
         DPNP_FN_PROD
diff --git a/dpnp/dpnp_algo/dpnp_algo_indexing.pyx b/dpnp/dpnp_algo/dpnp_algo_indexing.pyx
index 4e07c03f24a5..1759d6f782b1 100644
--- a/dpnp/dpnp_algo/dpnp_algo_indexing.pyx
+++ b/dpnp/dpnp_algo/dpnp_algo_indexing.pyx
@@ -40,7 +40,6 @@ __all__ += [
     "dpnp_diagonal",
     "dpnp_fill_diagonal",
     "dpnp_indices",
-    "dpnp_place",
     "dpnp_put",
     "dpnp_put_along_axis",
     "dpnp_putmask",
@@ -78,13 +77,6 @@ ctypedef c_dpctl.DPCTLSyclEventRef(*custom_indexing_2in_1out_func_ptr_t_)(c_dpct
 ctypedef c_dpctl.DPCTLSyclEventRef(*custom_indexing_2in_func_ptr_t)(c_dpctl.DPCTLSyclQueueRef,
                                                                     void *, void * , shape_elem_type * , const size_t,
                                                                     const c_dpctl.DPCTLEventVectorRef)
-ctypedef c_dpctl.DPCTLSyclEventRef(*custom_indexing_3in_func_ptr_t)(c_dpctl.DPCTLSyclQueueRef,
-                                                                    void * ,
-                                                                    void * ,
-                                                                    void * ,
-                                                                    const size_t,
-                                                                    const size_t,
-                                                                    const c_dpctl.DPCTLEventVectorRef)
 ctypedef c_dpctl.DPCTLSyclEventRef(*custom_indexing_3in_with_axis_func_ptr_t)(c_dpctl.DPCTLSyclQueueRef,
                                                                               void * ,
                                                                               void * ,
@@ -307,41 +299,6 @@ cpdef object dpnp_indices(dimensions):
     return dpnp_result
 
 
-cpdef dpnp_place(dpnp_descriptor arr, object mask, dpnp_descriptor vals):
-    result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(arr, vals)
-
-    cdef utils.dpnp_descriptor mask_ = utils_py.create_output_descriptor_py((mask.size,),
-                                                                            dpnp.int64,
-                                                                            None,
-                                                                            device=result_sycl_device,
-                                                                            usm_type=result_usm_type,
-                                                                            sycl_queue=result_sycl_queue)
-    for i in range(mask.size):
-        if mask.item(i):
-            mask_.get_pyobj()[i] = 1
-        else:
-            mask_.get_pyobj()[i] = 0
-    cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(arr.dtype)
-
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_PLACE_EXT, param1_type, param1_type)
-
-    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
-    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
-
-    cdef custom_indexing_3in_func_ptr_t func = <custom_indexing_3in_func_ptr_t > kernel_data.ptr
-
-    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref,
-                                                    arr.get_data(),
-                                                    mask_.get_data(),
-                                                    vals.get_data(),
-                                                    arr.size,
-                                                    vals.size,
-                                                    NULL)  # dep_events_ref
-
-    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
-    c_dpctl.DPCTLEvent_Delete(event_ref)
-
-
 cpdef dpnp_put(dpnp_descriptor x1, object ind, v):
     ind_is_list = isinstance(ind, list)
 
diff --git a/dpnp/dpnp_iface_indexing.py b/dpnp/dpnp_iface_indexing.py
index 9a026b6f570f..aa9989e5f7dc 100644
--- a/dpnp/dpnp_iface_indexing.py
+++ b/dpnp/dpnp_iface_indexing.py
@@ -377,24 +377,26 @@ def nonzero(x, /):
     return call_origin(numpy.nonzero, x)
 
 
-def place(x1, mask, vals):
+def place(x, mask, vals, /):
     """
     Change elements of an array based on conditional and input values.
     For full documentation refer to :obj:`numpy.place`.
 
     Limitations
     -----------
-    Input arrays ``arr`` and ``mask``  are supported as :obj:`dpnp.ndarray`.
-    Parameter ``vals`` is supported as 1-D sequence.
+    Parameters `x`, `mask` and `vals` are supported either as
+    :class:`dpnp.ndarray` or :class:`dpctl.tensor.usm_ndarray`.
+    Otherwise the function will be executed sequentially on CPU.
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
-    mask_desc = dpnp.get_dpnp_descriptor(mask, copy_when_nondefault_queue=False)
-    vals_desc = dpnp.get_dpnp_descriptor(vals, copy_when_nondefault_queue=False)
-    if x1_desc and mask_desc and vals_desc:
-        return dpnp_place(x1_desc, mask, vals_desc)
+    check_input_type = lambda x: isinstance(x, (dpnp_array, dpt.usm_ndarray))
+    if check_input_type(x) and check_input_type(mask) and check_input_type(vals):
+        dpt_array = x.get_array() if isinstance(x, dpnp_array) else x
+        dpt_mask = mask.get_array() if isinstance(mask, dpnp_array) else mask
+        dpt_vals = vals.get_array() if isinstance(vals, dpnp_array) else vals
+        return dpt.place(dpt_array, dpt_mask, dpt_vals)
 
-    return call_origin(numpy.place, x1, mask, vals, dpnp_inplace=True)
+    return call_origin(numpy.place, x, mask, vals, dpnp_inplace=True)
 
 
 def put(x1, ind, v, mode='raise'):
diff --git a/tests/skipped_tests.tbl b/tests/skipped_tests.tbl
index f745b217db63..2e7a5a6d6f3e 100644
--- a/tests/skipped_tests.tbl
+++ b/tests/skipped_tests.tbl
@@ -508,12 +508,6 @@ tests/third_party/cupy/indexing_tests/test_insert.py::TestFillDiagonal_param_5_{
 tests/third_party/cupy/indexing_tests/test_insert.py::TestFillDiagonal_param_6_{shape=(3, 3), val=(2, 2), wrap=True}::test_columnar_slice
 tests/third_party/cupy/indexing_tests/test_insert.py::TestFillDiagonal_param_7_{shape=(3, 3), val=(2, 2), wrap=False}::test_columnar_slice
 tests/third_party/cupy/indexing_tests/test_insert.py::TestFillDiagonal_param_9_{shape=(2, 2, 2), val=1, wrap=False}::test_1darray
-tests/third_party/cupy/indexing_tests/test_insert.py::TestPlaceRaises_param_0_{shape=(7,)}::test_place_empty_value_error
-tests/third_party/cupy/indexing_tests/test_insert.py::TestPlaceRaises_param_0_{shape=(7,)}::test_place_shape_unmatch_error
-tests/third_party/cupy/indexing_tests/test_insert.py::TestPlaceRaises_param_1_{shape=(2, 3)}::test_place_empty_value_error
-tests/third_party/cupy/indexing_tests/test_insert.py::TestPlaceRaises_param_1_{shape=(2, 3)}::test_place_shape_unmatch_error
-tests/third_party/cupy/indexing_tests/test_insert.py::TestPlaceRaises_param_2_{shape=(4, 3, 2)}::test_place_empty_value_error
-tests/third_party/cupy/indexing_tests/test_insert.py::TestPlaceRaises_param_2_{shape=(4, 3, 2)}::test_place_shape_unmatch_error
 tests/third_party/cupy/indexing_tests/test_insert.py::TestPutmaskDifferentDtypes::test_putmask_differnt_dtypes_raises
 tests/third_party/cupy/indexing_tests/test_insert.py::TestPutmask::test_putmask_non_equal_shape_raises
 tests/third_party/cupy/indexing_tests/test_iterate.py::TestFlatiter::test_next
diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl
index 2108b772a002..fee79df860fb 100644
--- a/tests/skipped_tests_gpu.tbl
+++ b/tests/skipped_tests_gpu.tbl
@@ -53,15 +53,6 @@ tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-trapz-data19]
 tests/test_sycl_queue.py::test_1in_1out[opencl:cpu:0-trapz-data19]
 
 tests/third_party/cupy/indexing_tests/test_indexing.py::TestIndexing::test_take_no_axis
-tests/third_party/cupy/indexing_tests/test_insert.py::TestPlace_param_3_{n_vals=1, shape=(7,)}::test_place
-tests/third_party/cupy/indexing_tests/test_insert.py::TestPlace_param_4_{n_vals=1, shape=(2, 3)}::test_place
-tests/third_party/cupy/indexing_tests/test_insert.py::TestPlace_param_5_{n_vals=1, shape=(4, 3, 2)}::test_place
-tests/third_party/cupy/indexing_tests/test_insert.py::TestPlace_param_6_{n_vals=3, shape=(7,)}::test_place
-tests/third_party/cupy/indexing_tests/test_insert.py::TestPlace_param_7_{n_vals=3, shape=(2, 3)}::test_place
-tests/third_party/cupy/indexing_tests/test_insert.py::TestPlace_param_8_{n_vals=3, shape=(4, 3, 2)}::test_place
-tests/third_party/cupy/indexing_tests/test_insert.py::TestPlace_param_9_{n_vals=15, shape=(7,)}::test_place
-tests/third_party/cupy/indexing_tests/test_insert.py::TestPlace_param_10_{n_vals=15, shape=(2, 3)}::test_place
-tests/third_party/cupy/indexing_tests/test_insert.py::TestPlace_param_11_{n_vals=15, shape=(4, 3, 2)}::test_place
 tests/third_party/cupy/indexing_tests/test_insert.py::TestDiagIndices_param_0_{n=2, ndim=2}::test_diag_indices
 tests/third_party/cupy/indexing_tests/test_insert.py::TestDiagIndices_param_1_{n=2, ndim=3}::test_diag_indices
 tests/third_party/cupy/indexing_tests/test_insert.py::TestDiagIndices_param_2_{n=2, ndim=1}::test_diag_indices
@@ -702,12 +693,6 @@ tests/third_party/cupy/indexing_tests/test_insert.py::TestFillDiagonal_param_5_{
 tests/third_party/cupy/indexing_tests/test_insert.py::TestFillDiagonal_param_6_{shape=(3, 3), val=(2, 2), wrap=True}::test_columnar_slice
 tests/third_party/cupy/indexing_tests/test_insert.py::TestFillDiagonal_param_7_{shape=(3, 3), val=(2, 2), wrap=False}::test_columnar_slice
 tests/third_party/cupy/indexing_tests/test_insert.py::TestFillDiagonal_param_9_{shape=(2, 2, 2), val=1, wrap=False}::test_1darray
-tests/third_party/cupy/indexing_tests/test_insert.py::TestPlaceRaises_param_0_{shape=(7,)}::test_place_empty_value_error
-tests/third_party/cupy/indexing_tests/test_insert.py::TestPlaceRaises_param_0_{shape=(7,)}::test_place_shape_unmatch_error
-tests/third_party/cupy/indexing_tests/test_insert.py::TestPlaceRaises_param_1_{shape=(2, 3)}::test_place_empty_value_error
-tests/third_party/cupy/indexing_tests/test_insert.py::TestPlaceRaises_param_1_{shape=(2, 3)}::test_place_shape_unmatch_error
-tests/third_party/cupy/indexing_tests/test_insert.py::TestPlaceRaises_param_2_{shape=(4, 3, 2)}::test_place_empty_value_error
-tests/third_party/cupy/indexing_tests/test_insert.py::TestPlaceRaises_param_2_{shape=(4, 3, 2)}::test_place_shape_unmatch_error
 tests/third_party/cupy/indexing_tests/test_insert.py::TestPutmaskDifferentDtypes::test_putmask_differnt_dtypes_raises
 tests/third_party/cupy/indexing_tests/test_insert.py::TestPutmask::test_putmask_non_equal_shape_raises
 tests/third_party/cupy/indexing_tests/test_iterate.py::TestFlatiter::test_next
diff --git a/tests/test_indexing.py b/tests/test_indexing.py
index 962e7f6b7b4a..41128fd70e2d 100644
--- a/tests/test_indexing.py
+++ b/tests/test_indexing.py
@@ -127,7 +127,6 @@ def test_nonzero(array):
     assert_array_equal(expected, result)
 
 
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @pytest.mark.parametrize("vals",
                          [[100, 200],
                           (100, 200)],
@@ -152,12 +151,12 @@ def test_place1(arr, mask, vals):
     ia = dpnp.array(a)
     m = numpy.array(mask)
     im = dpnp.array(m)
+    iv = dpnp.array(vals)
     numpy.place(a, m, vals)
-    dpnp.place(ia, im, vals)
+    dpnp.place(ia, im, iv)
     assert_array_equal(a, ia)
 
 
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @pytest.mark.parametrize("vals",
                          [[100, 200],
                           [100, 200, 300, 400, 500, 600],
@@ -176,12 +175,12 @@ def test_place2(arr, mask, vals):
     ia = dpnp.array(a)
     m = numpy.array(mask)
     im = dpnp.array(m)
+    iv = dpnp.array(vals)
     numpy.place(a, m, vals)
-    dpnp.place(ia, im, vals)
+    dpnp.place(ia, im, iv)
     assert_array_equal(a, ia)
 
 
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @pytest.mark.parametrize("vals",
                          [[100, 200],
                           [100, 200, 300, 400, 500, 600],
@@ -201,8 +200,9 @@ def test_place3(arr, mask, vals):
     ia = dpnp.array(a)
     m = numpy.array(mask)
     im = dpnp.array(m)
+    iv = dpnp.array(vals)
     numpy.place(a, m, vals)
-    dpnp.place(ia, im, vals)
+    dpnp.place(ia, im, iv)
     assert_array_equal(a, ia)
 
 
diff --git a/tests/third_party/cupy/indexing_tests/test_insert.py b/tests/third_party/cupy/indexing_tests/test_insert.py
index fdcc5357e19e..809c44b66dae 100644
--- a/tests/third_party/cupy/indexing_tests/test_insert.py
+++ b/tests/third_party/cupy/indexing_tests/test_insert.py
@@ -39,6 +39,7 @@ class TestPlaceRaises(unittest.TestCase):
     # https://github.com/numpy/numpy/pull/5821
     @testing.with_requires('numpy>=1.10')
     @testing.for_all_dtypes()
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_place_empty_value_error(self, dtype):
         for xp in (numpy, cupy):
             a = testing.shaped_arange(self.shape, xp, dtype)

From 098e6988887c29731f0c4ee849202228e478a882 Mon Sep 17 00:00:00 2001
From: Anton Volkov <antonwolfy@gmail.com>
Date: Mon, 20 Mar 2023 07:44:01 -0500
Subject: [PATCH 008/129] Add tests and use dpctl overlap function

---
 dpnp/dpnp_algo/dpnp_algo.pyx        | 14 ++--
 dpnp/dpnp_utils/dpnp_algo_utils.pxd |  1 +
 dpnp/dpnp_utils/dpnp_algo_utils.pyx | 40 ++++++++++--
 tests/test_mathematical.py          | 45 +++++++++++--
 tests/test_strides.py               | 99 +++++++++++++++++++++++++++++
 tests/test_umath.py                 | 46 ++++++++------
 6 files changed, 206 insertions(+), 39 deletions(-)

diff --git a/dpnp/dpnp_algo/dpnp_algo.pyx b/dpnp/dpnp_algo/dpnp_algo.pyx
index 2202ba5cfa47..923454142f43 100644
--- a/dpnp/dpnp_algo/dpnp_algo.pyx
+++ b/dpnp/dpnp_algo/dpnp_algo.pyx
@@ -507,19 +507,17 @@ cdef utils.dpnp_descriptor call_fptr_2in_1out_strides(DPNPFuncName fptr_name,
 
     # check 'out' parameter data
     if out is not None:
-        result_type = dpnp_DPNPFuncType_to_dtype(< size_t > return_type)
-        if out.dtype != result_type:
-            utils.checker_throw_value_error(func_name, 'out.dtype', out.dtype, result_type)
         if out.shape != result_shape:
             utils.checker_throw_value_error(func_name, 'out.shape', out.shape, result_shape)
 
         utils.get_common_usm_allocation(x1_obj, out)  # check USM allocation is common
 
-    if out is None or out.data in (x1_obj.data, x2_obj.data):
+    if out is None or out.is_array_overlapped(x1_obj) or out.is_array_overlapped(x2_obj) or not out.match_ctype(return_type):
         """
         Create result array with type given by FPTR data.
-        If 'out' array refers to the same memory as input arrays, we have to create a temporary array
-        and to copy data from the temporary into 'out' array, once the computation is completed.
+        If 'out' array has another dtype than expected or overlaps a memory from any input array,
+        we have to create a temporary array and to copy data from the temporary into 'out' array,
+        once the computation is completed.
         Otherwise simultaneously access to the same memory may cause a race condition issue
         which will result into undefined behaviour.
         """
@@ -565,8 +563,6 @@ cdef utils.dpnp_descriptor call_fptr_2in_1out_strides(DPNPFuncName fptr_name,
     c_dpctl.DPCTLEvent_Delete(event_ref)
 
     if out is not None and is_result_memory_allocated:
-        # copy the result data back to output array
-        out.get_array()[...] = result.get_array()
-        return out
+        return out.get_result_desc(result)
 
     return result.get_result_desc()
diff --git a/dpnp/dpnp_utils/dpnp_algo_utils.pxd b/dpnp/dpnp_utils/dpnp_algo_utils.pxd
index 2348faf94ccb..0015e8d12c02 100644
--- a/dpnp/dpnp_utils/dpnp_algo_utils.pxd
+++ b/dpnp/dpnp_utils/dpnp_algo_utils.pxd
@@ -122,6 +122,7 @@ cdef class dpnp_descriptor:
         cpp_bool dpnp_descriptor_is_scalar
 
     cdef void * get_data(self)
+    cdef cpp_bool match_ctype(self, DPNPFuncType ctype)
 
 
 cdef shape_type_c get_common_shape(shape_type_c input1_shape, shape_type_c input2_shape) except *
diff --git a/dpnp/dpnp_utils/dpnp_algo_utils.pyx b/dpnp/dpnp_utils/dpnp_algo_utils.pyx
index 43d473e5c235..7a3fb316261d 100644
--- a/dpnp/dpnp_utils/dpnp_algo_utils.pyx
+++ b/dpnp/dpnp_utils/dpnp_algo_utils.pyx
@@ -35,6 +35,8 @@ import numpy
 
 import dpctl
 import dpctl.utils as dpu
+import dpctl.tensor._copy_utils as dpt_cu
+import dpctl.tensor._tensor_impl as dpt_ti
 
 import dpnp.config as config
 import dpnp.dpnp_container as dpnp_container
@@ -746,7 +748,7 @@ cdef class dpnp_descriptor:
         return self.dpnp_descriptor_is_scalar
 
     @property
-    def is_tempored(self):
+    def is_temporary(self):
         """
         Non-none descriptor of original data means the current descriptor
         holds a temporary allocated data.
@@ -784,6 +786,15 @@ cdef class dpnp_descriptor:
 
         return interface_dict
 
+    def _copy_array_from(self, other_desc):
+        """
+        Fill array data with usm_ndarray of the same shape from other DPNP descriptor
+        """
+        if not isinstance(other_desc, dpnp_descriptor):
+            raise TypeError("expected dpnp_descriptor, got {}".format(type(other_desc)))
+
+        dpt_cu._copy_same_shape(self.get_array(), other_desc.get_array())
+
     def get_pyobj(self):
         return self.origin_pyobj
 
@@ -797,13 +808,29 @@ cdef class dpnp_descriptor:
             "expected either dpctl.tensor.usm_ndarray or dpnp.dpnp_array.dpnp_array, got {}"
             "".format(type(self.origin_pyobj)))
 
-    def get_result_desc(self):
-        if self.is_tempored:
-            """ Copy the result data into an original array """
-            self.origin_desc.get_array()[:] = self.get_array()
+    def get_result_desc(self, result_desc=None):
+        """
+        Copy the result data into an original array
+        """
+        if self.is_temporary:
+            # Original descriptor is not None, so copy the array data into it and return
+            from_desc = self if result_desc is None else result_desc
+            self.origin_desc._copy_array_from(from_desc)
             return self.origin_desc
+        elif result_desc is not None:
+            # A temporary result descriptor was allocated, needs to copy data back into 'out' descriptor
+            self._copy_array_from(result_desc)
         return self
 
+    def is_array_overlapped(self, other_desc):
+        """
+        Check if usm_ndarray overlaps an array from other DPNP descriptor
+        """
+        if not isinstance(other_desc, dpnp_descriptor):
+            raise TypeError("expected dpnp_descriptor, got {}".format(type(other_desc)))
+
+        return dpt_ti._array_overlap(self.get_array(), other_desc.get_array())
+
     cdef void * get_data(self):
         cdef Py_ssize_t item_size = 0
         cdef Py_ssize_t elem_offset = 0
@@ -818,6 +845,9 @@ cdef class dpnp_descriptor:
 
         return < void * > val
 
+    cdef cpp_bool match_ctype(self, DPNPFuncType ctype):
+        return self.dtype == dpnp_DPNPFuncType_to_dtype(< size_t > ctype)
+
     def __bool__(self):
         return self.is_valid
 
diff --git a/tests/test_mathematical.py b/tests/test_mathematical.py
index e58e129c03b3..4b3b5d07f941 100644
--- a/tests/test_mathematical.py
+++ b/tests/test_mathematical.py
@@ -635,7 +635,6 @@ def test_invalid_shape(self, shape):
 
 
 class TestPower:
-
     def test_power(self):
         array1_data = numpy.arange(10)
         array2_data = numpy.arange(5, 15)
@@ -655,13 +654,45 @@ def test_power(self):
         assert_array_equal(expected, result)
 
     @pytest.mark.parametrize("dtype", get_all_dtypes(no_complex=True, no_none=True))
-    def test_invalid_dtype(self, dtype):
-        dp_array1 = dpnp.arange(10, dtype=dpnp.complex64)
-        dp_array2 = dpnp.arange(5, 15, dtype=dpnp.complex64)
-        dp_out = dpnp.empty(10, dtype=dtype)
+    def test_out_dtypes(self, dtype):
+        size = 2 if dtype == dpnp.bool else 5
 
-        with pytest.raises(ValueError):
-            dpnp.power(dp_array1, dp_array2, out=dp_out)
+        np_array1 = numpy.arange(size, 2 * size, dtype=dtype)
+        np_array2 = numpy.arange(size, dtype=dtype)
+        np_out = numpy.empty(size, dtype=numpy.complex64)
+        expected = numpy.power(np_array1, np_array2, out=np_out)
+
+        dp_array1 = dpnp.arange(size, 2*size, dtype=dtype)
+        dp_array2 = dpnp.arange(size, dtype=dtype)
+        dp_out = dpnp.empty(size, dtype=dpnp.complex64)
+        result = dpnp.power(dp_array1, dp_array2, out=dp_out)
+
+        assert_array_equal(expected, result)
+
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True, no_none=True))
+    def test_out_overlap(self, dtype):
+        size = 5
+
+        np_a = numpy.arange(2 * size, dtype=dtype)
+        expected = numpy.power(np_a[size::], np_a[::2], out=np_a[:size:])
+
+        dp_a = dpnp.arange(2 * size, dtype=dtype)
+        result = dpnp.power(dp_a[size::], dp_a[::2], out=dp_a[:size:])
+
+        assert_allclose(expected, result)
+        assert_allclose(dp_a, np_a)
+
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True, no_none=True))
+    def test_inplace_strided_out(self, dtype):
+        size = 5
+
+        np_a = numpy.arange(2 * size, dtype=dtype)
+        np_a[::3] **= 3
+
+        dp_a = dpnp.arange(2 * size, dtype=dtype)
+        dp_a[::3] **= 3
+
+        assert_allclose(dp_a, np_a)
 
     @pytest.mark.parametrize("shape",
                              [(0,), (15, ), (2, 2)],
diff --git a/tests/test_strides.py b/tests/test_strides.py
index e56e9befeee4..10bd575bf6a9 100644
--- a/tests/test_strides.py
+++ b/tests/test_strides.py
@@ -214,3 +214,102 @@ def test_strides_true_devide(dtype, shape):
     expected = numpy.fmod(a, b)
 
     assert_allclose(result, expected)
+
+
+@pytest.mark.parametrize("func_name",
+                         ["power"])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
+def test_strided_out_2args(func_name, dtype):
+    np_out = numpy.ones((5, 3, 2))[::3]
+    np_a = numpy.arange(numpy.prod(np_out.shape), dtype=dtype).reshape(np_out.shape)
+    np_b = numpy.full(np_out.shape, fill_value=0.7, dtype=dtype)
+
+    dp_out = dpnp.ones((5, 3, 2))[::3]
+    dp_a = dpnp.array(np_a)
+    dp_b = dpnp.array(np_b)
+
+    np_res = _getattr(numpy, func_name)(np_a, np_b, out=np_out)
+    dp_res = _getattr(dpnp, func_name)(dp_a, dp_b, out=dp_out)
+
+    assert_allclose(dp_res.asnumpy(), np_res)
+    assert_allclose(dp_out.asnumpy(), np_out)
+
+
+@pytest.mark.parametrize("func_name",
+                         ["power"])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
+def test_strided_in_out_2args(func_name, dtype):
+    sh = (3, 4, 2)
+    prod = numpy.prod(sh)
+
+    np_out = numpy.ones(sh, dtype=dtype)[::2]
+    np_a = numpy.arange(prod, dtype=dtype).reshape(sh)[::2]
+    np_b = numpy.full(sh, fill_value=0.7, dtype=dtype)[::2].T
+
+    dp_out = dpnp.ones(sh, dtype=dtype)[::2]
+    dp_a = dpnp.arange(prod, dtype=dtype).reshape(sh)[::2]
+    dp_b = dpnp.full(sh, fill_value=0.7, dtype=dtype)[::2].T
+
+    np_res = _getattr(numpy, func_name)(np_a, np_b, out=np_out)
+    dp_res = _getattr(dpnp, func_name)(dp_a, dp_b, out=dp_out)
+
+    assert_allclose(dp_res.asnumpy(), np_res, rtol=1e-06)
+    assert_allclose(dp_out.asnumpy(), np_out, rtol=1e-06)
+
+
+@pytest.mark.parametrize("func_name",
+                         ["power"])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
+def test_strided_in_out_2args_diff_out_dtype(func_name, dtype):
+    sh = (3, 3, 2)
+    prod = numpy.prod(sh)
+
+    np_out = numpy.ones(sh, dtype=numpy.complex64)[::2]
+    np_a = numpy.arange(prod, dtype=dtype).reshape(sh)[::2].T
+    np_b = numpy.full(sh, fill_value=0.7, dtype=dtype)[::2]
+
+    dp_out = dpnp.ones(sh, dtype=dpnp.complex64)[::2]
+    dp_a = dpnp.arange(prod, dtype=dtype).reshape(sh)[::2].T
+    dp_b = dpnp.full(sh, fill_value=0.7, dtype=dtype)[::2]
+
+    np_res = _getattr(numpy, func_name)(np_a, np_b, out=np_out)
+    dp_res = _getattr(dpnp, func_name)(dp_a, dp_b, out=dp_out)
+
+    assert_allclose(dp_res.asnumpy(), np_res, rtol=1e-06)
+    assert_allclose(dp_out.asnumpy(), np_out, rtol=1e-06)
+
+
+@pytest.mark.parametrize("func_name",
+                         ["power"])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True, no_none=True))
+def test_strided_in_2args_overlap(func_name, dtype):
+    size = 5
+
+    np_a = numpy.arange(2 * size, dtype=dtype)
+    dp_a = dpnp.arange(2 * size, dtype=dtype)
+
+    np_res = _getattr(numpy, func_name)(np_a[size::], np_a[::2], out=np_a[:size:])
+    dp_res = _getattr(dpnp, func_name)(dp_a[size::], dp_a[::2], out=dp_a[:size:])
+
+    assert_allclose(dp_res.asnumpy(), np_res, rtol=1e-06)
+    assert_allclose(dp_a.asnumpy(), np_a, rtol=1e-06)
+
+
+@pytest.mark.parametrize("func_name",
+                         ["power"])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True, no_none=True))
+def test_strided_in_out_2args_overlap(func_name, dtype):
+    sh = (4, 3, 2)
+    prod = numpy.prod(sh)
+
+    np_a = numpy.arange(prod, dtype=dtype).reshape(sh)
+    np_b = numpy.full(np_a[::2].shape, fill_value=0.7, dtype=dtype)
+
+    dp_a = dpnp.arange(prod, dtype=dtype).reshape(sh)
+    dp_b = dpnp.full(dp_a[::2].shape, fill_value=0.7, dtype=dtype)
+
+    np_res = _getattr(numpy, func_name)(np_a[::2], np_b, out=np_a[1::2])
+    dp_res = _getattr(dpnp, func_name)(dp_a[::2], dp_b, out=dp_a[1::2])
+
+    assert_allclose(dp_res.asnumpy(), np_res, rtol=1e-06)
+    assert_allclose(dp_a.asnumpy(), np_a, rtol=1e-06)
diff --git a/tests/test_umath.py b/tests/test_umath.py
index 6122b253ca37..3a1f4467dcea 100644
--- a/tests/test_umath.py
+++ b/tests/test_umath.py
@@ -1,6 +1,14 @@
 import pytest
+from .helper import (
+    get_all_dtypes
+)
 
 import numpy
+from numpy.testing import (
+    assert_allclose,
+    assert_array_equal
+)
+
 import dpnp
 
 # full list of umaths
@@ -71,7 +79,7 @@ def test_umaths(test_cases):
     # DPNP
     result = getattr(dpnp, umath)(*iargs)
 
-    numpy.testing.assert_allclose(result, expected, rtol=1e-6)
+    assert_allclose(result, expected, rtol=1e-6)
 
 
 class TestSin:
@@ -89,7 +97,7 @@ def test_sin_ordinary(self):
         np_array = numpy.array(array_data, dtype=numpy.float64)
         expected = numpy.sin(np_array, out=out)
 
-        numpy.testing.assert_array_equal(expected, result)
+        assert_array_equal(expected, result)
 
     @pytest.mark.parametrize("dtype",
                              [numpy.float32, numpy.int64, numpy.int32],
@@ -129,7 +137,7 @@ def test_cos(self):
         np_array = numpy.array(array_data, dtype=numpy.float64)
         expected = numpy.cos(np_array, out=out)
 
-        numpy.testing.assert_array_equal(expected, result)
+        assert_array_equal(expected, result)
 
     @pytest.mark.parametrize("dtype",
                              [numpy.float32, numpy.int64, numpy.int32],
@@ -169,7 +177,7 @@ def test_log(self):
         np_array = numpy.array(array_data, dtype=numpy.float64)
         expected = numpy.log(np_array, out=out)
 
-        numpy.testing.assert_array_equal(expected, result)
+        assert_array_equal(expected, result)
 
     @pytest.mark.parametrize("dtype",
                              [numpy.float32, numpy.int64, numpy.int32],
@@ -209,7 +217,7 @@ def test_exp(self):
         np_array = numpy.array(array_data, dtype=numpy.float64)
         expected = numpy.exp(np_array, out=out)
 
-        numpy.testing.assert_array_equal(expected, result)
+        assert_array_equal(expected, result)
 
     @pytest.mark.parametrize("dtype",
                              [numpy.float32, numpy.int64, numpy.int32],
@@ -249,7 +257,7 @@ def test_arcsin(self):
         np_array = numpy.array(array_data, dtype=numpy.float64)
         expected = numpy.arcsin(np_array, out=out)
 
-        numpy.testing.assert_array_equal(expected, result)
+        assert_array_equal(expected, result)
 
     @pytest.mark.parametrize("dtype",
                              [numpy.float32, numpy.int64, numpy.int32],
@@ -289,7 +297,7 @@ def test_arctan(self):
         np_array = numpy.array(array_data, dtype=numpy.float64)
         expected = numpy.arctan(np_array, out=out)
 
-        numpy.testing.assert_array_equal(expected, result)
+        assert_array_equal(expected, result)
 
     @pytest.mark.parametrize("dtype",
                              [numpy.float32, numpy.int64, numpy.int32],
@@ -329,7 +337,7 @@ def test_tan(self):
         np_array = numpy.array(array_data, dtype=numpy.float64)
         expected = numpy.tan(np_array, out=out)
 
-        numpy.testing.assert_array_equal(expected, result)
+        assert_array_equal(expected, result)
 
     @pytest.mark.parametrize("dtype",
                              [numpy.float32, numpy.int64, numpy.int32],
@@ -355,7 +363,6 @@ def test_invalid_shape(self, shape):
 
 
 class TestArctan2:
-
     def test_arctan2(self):
         array_data = numpy.arange(10)
         out = numpy.empty(10, dtype=numpy.float64)
@@ -369,18 +376,21 @@ def test_arctan2(self):
         np_array = numpy.array(array_data, dtype=numpy.float64)
         expected = numpy.arctan2(np_array, np_array, out=out)
 
-        numpy.testing.assert_array_equal(expected, result)
+        assert_array_equal(expected, result)
 
-    @pytest.mark.parametrize("dtype",
-                             [numpy.float32, numpy.int64, numpy.int32],
-                             ids=['numpy.float32', 'numpy.int64', 'numpy.int32'])
-    def test_invalid_dtype(self, dtype):
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True, no_none=True))
+    def test_out_dtypes(self, dtype):
+        size = 2 if dtype == dpnp.bool else 10
 
-        dp_array = dpnp.arange(10, dtype=dpnp.float64)
-        dp_out = dpnp.empty(10, dtype=dtype)
+        np_array = numpy.arange(size, dtype=dtype)
+        np_out = numpy.empty(size, dtype=numpy.complex64)
+        expected = numpy.arctan2(np_array, np_array, out=np_out)
 
-        with pytest.raises(ValueError):
-            dpnp.arctan2(dp_array, dp_array, out=dp_out)
+        dp_array = dpnp.arange(size, dtype=dtype)
+        dp_out = dpnp.empty(size, dtype=dpnp.complex64)
+        result = dpnp.arctan2(dp_array, dp_array, out=dp_out)
+
+        assert_allclose(expected, result)
 
     @pytest.mark.parametrize("shape",
                              [(0,), (15, ), (2, 2)],

From 921893e0bd6b4c27367774016485bda246168073 Mon Sep 17 00:00:00 2001
From: Oleksandr Pavlyk <oleksandr.pavlyk@intel.com>
Date: Wed, 22 Mar 2023 11:07:34 -0500
Subject: [PATCH 009/129] Transitioned dpnp build system to use scikit-build

To build dpnp, one one does

```bash
CC=icx CXX=icpx python setup.py develop -G Ninja -- -DDPCTL_MODULE_PATH=$(python -m dpctl --cmakedir)
```

or on Windows

```cmd
python setup.py develop -G Ninja -- -DDPCTL_MODULE_PATH=<path> -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icx
```

where the `<path>` is the output of `python -m dpctl --cmakedir`.
---
 CMakeLists.txt                               | 123 ++++++
 dpnp/CMakeLists.txt                          |  44 ++
 dpnp/backend/CMakeLists.txt                  | 337 ++-------------
 dpnp/backend/cmake/Modules/FindDPL.cmake     |  53 ---
 dpnp/backend/cmake/Modules/FindMathLib.cmake |  75 ----
 dpnp/backend/saved_old_CMakeLists.txt        | 354 +++++++++++++++
 dpnp/dpnp_algo/CMakeLists.txt                |  25 ++
 dpnp/dpnp_utils/CMakeLists.txt               |   7 +
 dpnp/fft/CMakeLists.txt                      |   7 +
 dpnp/linalg/CMakeLists.txt                   |   7 +
 dpnp/random/CMakeLists.txt                   |   7 +
 setup.py                                     | 250 +----------
 utils/command_build_clib.py                  | 427 -------------------
 utils/command_build_cmake_clib.py            | 134 ------
 utils/command_clean.py                       |  84 ----
 utils/command_style.py                       | 155 -------
 utils/dpnp_build_utils.py                    | 397 -----------------
 utils/dpnp_coverage.py                       | 154 -------
 18 files changed, 633 insertions(+), 2007 deletions(-)
 create mode 100644 CMakeLists.txt
 create mode 100644 dpnp/CMakeLists.txt
 delete mode 100644 dpnp/backend/cmake/Modules/FindDPL.cmake
 delete mode 100644 dpnp/backend/cmake/Modules/FindMathLib.cmake
 create mode 100644 dpnp/backend/saved_old_CMakeLists.txt
 create mode 100644 dpnp/dpnp_algo/CMakeLists.txt
 create mode 100644 dpnp/dpnp_utils/CMakeLists.txt
 create mode 100644 dpnp/fft/CMakeLists.txt
 create mode 100644 dpnp/linalg/CMakeLists.txt
 create mode 100644 dpnp/random/CMakeLists.txt
 delete mode 100644 utils/command_build_clib.py
 delete mode 100644 utils/command_build_cmake_clib.py
 delete mode 100644 utils/command_clean.py
 delete mode 100644 utils/command_style.py
 delete mode 100644 utils/dpnp_build_utils.py
 delete mode 100644 utils/dpnp_coverage.py

diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 000000000000..5ff196915a8a
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,123 @@
+cmake_minimum_required(VERSION 3.21...3.26 FATAL_ERROR)
+
+if (${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.24")
+  cmake_policy(SET CMP0135 NEW)
+endif()
+
+project(dpnp
+  DESCRIPTION "NumPy-like API accelerated by SYCL."
+)
+
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED True)
+set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE BOTH)
+set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH)
+
+if (NOT DEFINED DPCTL_MODULE_PATH)
+  if (DEFINED ENV{DPCTL_MODULE_PATH})
+    set(DPCTL_MODULE_PATH $ENV{DPCTL_MODULE_PATH})
+  else ()
+    message(FATAL_ERROR "Specify DPCTL_MODULE_PATH, either via cmake or as environment varibale")
+  endif()
+endif()
+
+set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${DPCTL_MODULE_PATH})
+
+
+find_package(IntelDPCPP REQUIRED)
+find_package(TBB REQUIRED)
+
+set(MKL_ARCH "intel64")
+set(MKL_LINK "dynamic")
+set(MKL_INTERFACE_FULL "intel_ilp64")
+set(MKL_THREADING "tbb_thread")
+find_package(MKL REQUIRED)
+
+find_package(oneDPL REQUIRED)
+
+include(GNUInstallDirs)
+
+# Fetch pybind11
+include(FetchContent)
+FetchContent_Declare(
+  pybind11
+  URL https://github.com/pybind/pybind11/archive/refs/tags/v2.10.2.tar.gz
+  URL_HASH SHA256=93bd1e625e43e03028a3ea7389bba5d3f9f2596abc074b068e70f4ef9b1314ae
+)
+FetchContent_MakeAvailable(pybind11)
+
+find_package(PythonExtensions REQUIRED)
+find_package(NumPy REQUIRED)
+
+set(CYTHON_FLAGS "-t -w \"${CMAKE_SOURCE_DIR}\"")
+find_package(Cython REQUIRED)
+find_package(Dpctl REQUIRED)
+
+if(WIN32)
+    string(CONCAT WARNING_FLAGS
+        "-Wall "
+        "-Wextra "
+        "-Winit-self "
+        "-Wunused-function "
+        "-Wuninitialized "
+        "-Wmissing-declarations "
+        "-Wno-unused-parameter "
+    )
+    string(CONCAT SDL_FLAGS
+        "/GS "
+        "/DynamicBase "
+    )
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /Ox ${WARNING_FLAGS} ${SDL_FLAGS}")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Ox ${WARNING_FLAGS} ${SDL_FLAGS}")
+    set(CMAKE_C_FLAGS_DEBUG
+        "${CMAKE_C_FLAGS_DEBUG} ${WARNING_FLAGS} ${SDL_FLAGS} -O0 -g1 -DDEBUG"
+    )
+    set(CMAKE_CXX_FLAGS_DEBUG
+        "${CMAKE_CXX_FLAGS_DEBUG} ${WARNING_FLAGS} ${SDL_FLAGS} -O0 -g1 -DDEBUG"
+    )
+    set(DPNP_LDFLAGS "/NXCompat;/DynamicBase")
+elseif(UNIX)
+    string(CONCAT WARNING_FLAGS
+        "-Wall "
+        "-Wextra "
+        "-Winit-self "
+        "-Wunused-function "
+        "-Wuninitialized "
+        "-Wmissing-declarations "
+        "-fdiagnostics-color=auto "
+    )
+    string(CONCAT SDL_FLAGS
+        "-fstack-protector "
+        "-fstack-protector-all "
+        "-fpic "
+        "-fPIC "
+        "-D_FORTIFY_SOURCE=2 "
+        "-Wformat "
+        "-Wformat-security "
+#        "-fno-strict-overflow "   # implied by -fwrapv
+        "-fno-delete-null-pointer-checks "
+        "-fwrapv "
+    )
+    string(CONCAT CFLAGS
+        "${WARNING_FLAGS}"
+        "${SDL_FLAGS}"
+    )
+    string(CONCAT CXXFLAGS
+        "${WARNING_FLAGS}"
+        "${SDL_FLAGS}"
+        "-fsycl "
+    )
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O3 ${CFLAGS}")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 ${CXXFLAGS}")
+    set(CMAKE_C_FLAGS_DEBUG
+        "${CMAKE_C_FLAGS_DEBUG} ${CFLAGS} -O0 -g1 -DDEBUG"
+    )
+    set(CMAKE_CXX_FLAGS_DEBUG
+        "${CMAKE_CXX_FLAGS_DEBUG} ${CXXFLAGS} -O0 -g1 -DDEBUG"
+    )
+    set(DPNP_LDFLAGS "-z,noexecstack,-z,relro,-z,now")
+else()
+    message(FATAL_ERROR "Unsupported system.")
+endif()
+
+add_subdirectory(dpnp)
diff --git a/dpnp/CMakeLists.txt b/dpnp/CMakeLists.txt
new file mode 100644
index 000000000000..7d3936da2d4b
--- /dev/null
+++ b/dpnp/CMakeLists.txt
@@ -0,0 +1,44 @@
+
+function(build_dpnp_cython_ext _trgt _src _dest)
+  add_cython_target(${_trgt} ${_src} CXX OUTPUT_VAR _generated_src)
+  message(STATUS "Using ${_trgt}")
+  add_library(${_trgt} MODULE ${_generated_src})
+  set(_trgt_deps "${_trgt}_deps")
+  add_custom_target(${_trgt_deps} DEPENDS ${_src})
+  add_dependencies(${_trgt} ${_trgt_deps})
+  if (DPNP_GENERATE_COVERAGE)
+    target_compile_definitions(${_trgt} PRIVATE CYTHON_TRACE=1 CYTHON_TRACE_NOGIL=1)
+    target_compile_options(${_trgt} PRIVATE -fno-sycl-use-footer)
+  endif()
+  # NumPy
+  target_include_directories(${_trgt} PRIVATE ${NumPy_INCLUDE_DIR})
+  # Dpctl
+  target_include_directories(${_trgt} PRIVATE ${Dpctl_INCLUDE_DIR})
+  target_link_directories(${_trgt} PRIVATE ${Dpctl_INCLUDE_DIR}/..)
+  target_link_libraries(${_trgt} DPCTLSyclInterface)
+  
+  set(_linker_options "LINKER:${DPNP_LDFLAGS}")
+  target_link_options(${_trgt} PRIVATE ${_linker_options})
+  python_extension_module(${_trgt})
+  
+  install(TARGETS ${_trgt} LIBRARY DESTINATION ${_dest})
+endfunction()
+
+function(build_dpnp_cython_ext_with_backend _trgt _src _dest)
+  build_dpnp_cython_ext(${_trgt} ${_src} ${_dest})
+  target_link_libraries(${_trgt} dpnp_backend_library)
+  if (UNIX)
+    set_target_properties(${_trgt} PROPERTIES INSTALL_RPATH "$ORIGIN/..")
+  endif()
+endfunction()
+
+
+build_dpnp_cython_ext_with_backend(dparray ${CMAKE_CURRENT_SOURCE_DIR}/dparray.pyx dpnp)
+add_subdirectory(backend)
+
+add_subdirectory(dpnp_algo)
+add_subdirectory(dpnp_utils)
+add_subdirectory(fft)
+add_subdirectory(linalg)
+add_subdirectory(random)
+
diff --git a/dpnp/backend/CMakeLists.txt b/dpnp/backend/CMakeLists.txt
index 330eb1030f3d..744768aa158d 100644
--- a/dpnp/backend/CMakeLists.txt
+++ b/dpnp/backend/CMakeLists.txt
@@ -23,187 +23,6 @@
 # THE POSSIBILITY OF SUCH DAMAGE.
 # *****************************************************************************
 
-# cmake-format -i CMakeLists.txt --line-width=120
-
-cmake_minimum_required(VERSION 3.10 FATAL_ERROR)
-
-# set(DPNP_VERSION 0.11.1)
-# set(DPNP_API_VERSION 0.11)
-
-# set directory where the custom finders live
-set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/Modules")
-
-# -----------------------------------------------------------------------------------------------
-# Project build options...
-# -----------------------------------------------------------------------------------------------
-if(DEFINED ENV{ONEAPI_ROOT})
-  set(DPNP_ONEAPI_ROOT "$ENV{ONEAPI_ROOT}" CACHE PATH "Folder contains oneapi tool set")
-endif()
-
-option(DPNP_STATIC_LIB_ENABLE "Enable build DPNP static library" FALSE)
-option(DPNP_DEBUG_ENABLE "Enable output for DPNP_DEBUG statements" FALSE)
-option(DPNP_INSTALL_STRUCTURED "if FALSE, install package files into same directory" TRUE)
-option(DPNP_SYCL_QUEUE_MGR_ENABLE "Use external manager for SYCL queue" FALSE)
-option(DPNP_BACKEND_TESTS "Enable DPNP tests" FALSE)
-
-if(DEFINED ENV{DPNP_DEBUG})
-  set(DPNP_DEBUG_ENABLE $ENV{DPNP_DEBUG})
-endif()
-
-message(STATUS "CMAKE_VERSION:                   ${CMAKE_VERSION}")
-message(STATUS "CMAKE_GENERATOR:                 ${CMAKE_GENERATOR}")
-message(STATUS "CMAKE_HOST_SYSTEM_NAME:          ${CMAKE_HOST_SYSTEM_NAME}")
-message(STATUS "========== User controlled variables list ==========")
-message(STATUS "DPNP_ONEAPI_ROOT:                ${DPNP_ONEAPI_ROOT}")
-message(STATUS "DPNP_STATIC_LIB_ENABLE:          ${DPNP_STATIC_LIB_ENABLE}")
-message(STATUS "DPNP_DEBUG_ENABLE:               ${DPNP_DEBUG_ENABLE}")
-message(STATUS "DPNP_BACKEND_TESTS:              ${DPNP_BACKEND_TESTS}")
-message(STATUS "DPNP_INSTALL_STRUCTURED:         ${DPNP_INSTALL_STRUCTURED}")
-message(STATUS "DPNP_SYCL_QUEUE_MGR_ENABLE:      ${DPNP_SYCL_QUEUE_MGR_ENABLE}")
-message(STATUS " |- DPNP_QUEUEMGR_INCLUDE_DIR:   ${DPNP_QUEUEMGR_INCLUDE_DIR}")
-message(STATUS " |- DPNP_QUEUEMGR_LIB_DIR:       ${DPNP_QUEUEMGR_LIB_DIR}")
-message(STATUS "======= End of user controlled variables list ======")
-
-# -----------------------------------------------------------------------------------------------
-# Compiler-specific logic...
-# -----------------------------------------------------------------------------------------------
-
-# cmake 3.19.1 has a bug in dpcpp compiler detection. Let's assume it is a clang
-# set(CMAKE_CXX_COMPILER_ID "Clang")
-# set(CMAKE_CXX_COMPILER_VERSION 12.0)
-if (CMAKE_VERSION VERSION_EQUAL 3.19.1)
-    message(FATAL_ERROR
-      " Unsupported cmake version ${CMAKE_VERSION}\n"
-      " Please use other cmake version, for example:\n"
-      "in Linux:\n"
-      " curl --output cmake_webimage.tar.gz --url https://cmake.org/files/v3.19/cmake-3.19.2-Linux-x86_64.tar.gz --retry 5 --retry-delay 5\n"
-      " tar -xzf cmake_webimage.tar.gz\n"
-      " rm -f cmake_webimage.tar.gz\n"
-      " export PATH=`pwd`/cmake-3.19.2-Linux-x86_64/bin:$PATH\n"
-      "in Windows:\n"
-      " curl.exe --output cmake_webimage.zip --url https://cmake.org/files/v3.19/cmake-3.19.2-win64-x64.zip --retry 5 --retry-delay 5\n"
-      " tar -xf cmake_webimage.zip\n"
-      " del cmake_webimage.zip\n"
-      " set PATH=%CD%\\cmake-3.19.2-win64-x64\\bin;%PATH%\n"
-        )
-endif()
-
-# SYCL related compile options
-string(CONCAT COMMON_COMPILE_FLAGS
-  "-fsycl "
-  "-fno-approx-func "
-  "-fno-finite-math-only "
-)
-string(CONCAT COMMON_LINK_FLAGS
-  "-fsycl "
-  "-fsycl-device-code-split=per_kernel "
-)
-if(UNIX)
-  set(CMAKE_CXX_COMPILER "icpx")
-  # add_compile_options(-fPIC)
-elseif(WIN32)
-  set(CMAKE_CXX_COMPILER "icx")
-  # set(CMAKE_SHARED_LINKER_FLAGS_INIT "-fuse-ld=lld-link")
-  # set(CMAKE_LINKER "lld-link")
-  # include (Platform/Windows-Clang)
-  # set(CMAKE_LINKER "dpcpp")
-  # set(CMAKE_AR "llvm-ar")
-  # set(CMAKE_RANLIB "llvm-ranlib")
-  # set(CMAKE_CXX_FLAGS "/EHsc")
-
-  string(APPEND COMMON_COMPILE_FLAGS
-    "/EHsc "
-#    "/Ox "
-#    "/W3 "
-#    "/GL "
-#    "/DNDEBUG "
-#    "/MD "
-  )
-else()
-  message(FATAL_ERROR "Unsupported system ${CMAKE_SYSTEM} in compiler selection case")
-endif()
-
-# set language version
-set(CMAKE_CXX_STANDARD 17)
-set(CMAKE_CXX_STANDARD_REQUIRED ON)
-
-# warning flag set
-string(CONCAT DPNP_WARNING_FLAGS
-  "-W "
-  "-Wextra "
-  "-Wshadow "
-  "-Wall "
-  "-Wstrict-prototypes "
-  "-Wformat "
-  "-Wformat-security "
-)
-string(APPEND COMMON_COMPILE_FLAGS
-  "${DPNP_WARNING_FLAGS}"
-)
-
-# debug/release compile definitions
-if(DPNP_DEBUG_ENABLE)
-  set(CMAKE_BUILD_TYPE "Debug")
-  string(APPEND COMMON_COMPILE_FLAGS
-    "-O0 "
-    "-ggdb3 "
-  )
-  string(APPEND COMMON_LINK_FLAGS
-    "-O0 "
-    "-ggdb3 "
-    "-fsycl-link-huge-device-code "
-  )
-else()
-  set(CMAKE_BUILD_TYPE "Release")
-  string(APPEND COMMON_COMPILE_FLAGS
-    "-O3 "
-  )
-endif()
-
-# -----------------------------------------------------------------------------------------------
-# Auxilary building options...
-# -----------------------------------------------------------------------------------------------
-# sdl
-string(CONCAT DPNP_DEFS
-  "-D_FORTIFY_SOURCE=2 "
-)
-if(NOT WIN32)
-  string(APPEND COMMON_COMPILE_FLAGS
-    "-fno-delete-null-pointer-checks "
-    "-fstack-protector-strong "
-    "-fno-strict-overflow "
-    "-fwrapv "
-    )
-  string(APPEND COMMON_LINK_FLAGS
-    "LINKER:-z,noexecstack,-z,relro,-z,now "
-  )
-endif()
-
-# disable PSTL policies due to compiler bug
-string(APPEND DPNP_DEFS
-  "-DPSTL_USE_PARALLEL_POLICIES=0 "
-  "-D_GLIBCXX_USE_TBB_PAR_BACKEND=0 "
-)
-
-# disable PSTL predefined policies objects (global queues, prevent fail on Windows)
-string(APPEND DPNP_DEFS
-  "-DONEDPL_USE_PREDEFINED_POLICIES=0 "
-)
-
-# -----------------------------------------------------------------------------------------------
-# Create project...
-# -----------------------------------------------------------------------------------------------
-# set(CMAKE_CXX_COMPILER "clang++")
-project(dpnp_project
-        # VERSION ${DPNP_VERSION}
-        DESCRIPTION "DPNP: NumPy-like API accelerated with SYCL"
-        HOMEPAGE_URL https://github.com/IntelPython/dpnp
-        LANGUAGES CXX)
-# set(CMAKE_CXX_COMPILER "dpcpp")
-
-# -----------------------------------------------------------------------------------------------
-# Building logic...
-# -----------------------------------------------------------------------------------------------
 set(DPNP_SRC
     kernels/dpnp_krnl_arraycreation.cpp
     kernels/dpnp_krnl_bitwise.cpp
@@ -228,127 +47,49 @@ set(DPNP_SRC
     src/dpnp_random_state.cpp
     )
 
-if(DPNP_STATIC_LIB_ENABLE)
-  add_library(dpnp_backend_c STATIC ${DPNP_SRC})
-else()
-  add_library(dpnp_backend_c SHARED ${DPNP_SRC})
-  set_target_properties(dpnp_backend_c PROPERTIES CMAKE_POSITION_INDEPENDENT_CODE ON)
-endif()
+find_package(MKL REQUIRED)
+find_package(oneDPL REQUIRED)
 
-target_include_directories(dpnp_backend_c PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
-target_include_directories(dpnp_backend_c PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/src)
+set(_trgt "dpnp_backend_c")
 
-string(REPLACE " " ";" COMMON_COMPILE_FLAGS_AS_LIST ${COMMON_COMPILE_FLAGS})
-target_compile_options(dpnp_backend_c PUBLIC ${COMMON_COMPILE_FLAGS_AS_LIST})
-string(REPLACE " " ";" DPNP_DEFS_AS_LIST ${DPNP_DEFS})
-target_compile_definitions(dpnp_backend_c PUBLIC ${DPNP_DEFS_AS_LIST})
-string(REPLACE " " ";" COMMON_LINK_FLAGS_AS_LIST ${COMMON_LINK_FLAGS})
-target_link_options(dpnp_backend_c PUBLIC ${COMMON_LINK_FLAGS_AS_LIST})
+add_library(${_trgt} SHARED ${DPNP_SRC})
+set_target_properties(${_trgt} PROPERTIES CMAKE_POSITION_INDEPENDENT_CODE ON)
 
+target_include_directories(${_trgt} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
+target_include_directories(${_trgt} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/src)
 
-# -----------------------------------------------------------------------------------------------
-# Testing logic...
-# -----------------------------------------------------------------------------------------------
-if(DPNP_BACKEND_TESTS)
-  add_subdirectory(tests)
-endif()
-
-# -----------------------------------------------------------------------------------------------
-# Dependencies logic...
-# -----------------------------------------------------------------------------------------------
-# Math library
-find_package(MathLib REQUIRED)
-target_compile_definitions(dpnp_backend_c PUBLIC -DMKL_ILP64=1)
-target_include_directories(dpnp_backend_c PUBLIC ${MATHLIB_INCLUDE_DIR})
-
-link_directories(dpnp_backend_c PUBLIC ${MATHLIB_LIBRARY_DIR}) # does not work with some cmake versions
-target_link_directories(dpnp_backend_c PUBLIC ${MATHLIB_LIBRARY_DIR}) # duplicate link_directories
-
+target_link_options(${_trgt} PUBLIC -fsycl-device-code-split=per_kernel)
 if(UNIX)
-# Link Line Advisor v6.13
-# -DMKL_ILP64 -I"${MKLROOT}/include"
-# -lmkl_sycl -lmkl_intel_ilp64 -lmkl_sequential -lmkl_core -lsycl -lOpenCL -lpthread -lm -ldl
-  set(DPNP_MATHLIB_DEP_LIBS
-      mkl_sycl
-      mkl_intel_ilp64
-      mkl_tbb_thread # mkl_sequential
-      mkl_core
-      sycl
-      OpenCL
-      pthread
-      m
-      dl
-      CACHE STRING "Set of libraries to link")
-elseif(WIN32)
-# Link Line Advisor v6.13
-# -DMKL_ILP64 -I"%MKLROOT%\include"
-# mkl_sycl_dll.lib mkl_intel_ilp64_dll.lib mkl_sequential_dll.lib mkl_core_dll.lib sycl.lib OpenCL.lib
-  set(DPNP_MATHLIB_DEP_LIBS
-      mkl_sycl_dll
-      mkl_intel_ilp64_dll
-      mkl_tbb_thread_dll # mkl_sequential_dll
-      mkl_core_dll
-      sycl
-      OpenCL
-      CACHE STRING "Set of libraries to link")
-else()
-  message(FATAL_ERROR "Unsupported system ${CMAKE_SYSTEM} in MathLib libraries set")
+    # this option is support on Linux only
+    target_link_options(${_trgt} PUBLIC -fsycl-link-huge-device-code)
 endif()
 
-target_link_libraries(dpnp_backend_c PUBLIC ${DPNP_MATHLIB_DEP_LIBS})
-
-# Parallel STL
-find_package(DPL REQUIRED)
-target_include_directories(dpnp_backend_c PUBLIC ${DPL_INCLUDE_DIR})
-
-# SYCL queue manager
-if(DPNP_SYCL_QUEUE_MGR_ENABLE)
-    target_include_directories(dpnp_backend_c PUBLIC ${DPNP_QUEUEMGR_INCLUDE_DIR})
-    target_link_directories(dpnp_backend_c PUBLIC ${DPNP_QUEUEMGR_LIB_DIR})
-    target_link_libraries(dpnp_backend_c PUBLIC "DPCTLSyclInterface")
-
-    # not sure but add runpath
-    set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}:${DPNP_QUEUEMGR_LIB_DIR}")
-
-    # disable stripping rpath in installation logic
-    set_target_properties(dpnp_backend_c PROPERTIES INSTALL_RPATH_USE_LINK_PATH TRUE)
-else()
-    target_compiler_definitions(dpnp_backend_c PUBLIC -DDPNP_LOCAL_QUEUE=1)
-endif()
-
-# -----------------------------------------------------------------------------------------------
-# Installation logic...
-# -----------------------------------------------------------------------------------------------
-set(CMAKE_INSTALL_PREFIX ${CMAKE_SOURCE_DIR}/package_dpnp)
-
-if(DEFINED DPNP_INSTALL_PREFIX)
-  set(CMAKE_INSTALL_PREFIX ${DPNP_INSTALL_PREFIX})
-endif()
-
-if(NOT DPNP_INSTALL_STRUCTURED)
-  set(CMAKE_INSTALL_BINDIR ${CMAKE_INSTALL_PREFIX})
-  set(CMAKE_INSTALL_LIBDIR ${CMAKE_INSTALL_PREFIX})
-  set(CMAKE_INSTALL_INCLUDEDIR ${CMAKE_INSTALL_PREFIX})
-endif()
-
-# set_target_properties(dpnp_backend_c PROPERTIES VERSION ${DPNP_VERSION} SOVERSION ${DPNP_API_VERSION})
-
-install(TARGETS dpnp_backend_c
-        PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE
-        )
-
-# -----------------------------------------------------------------------------------------------
-# Print Global Options
-# -----------------------------------------------------------------------------------------------
-message(STATUS "CMAKE_SYSTEM:                    ${CMAKE_SYSTEM}")
-message(STATUS "CMAKE_SYSTEM_VERSION:            ${CMAKE_SYSTEM_VERSION}")
-message(STATUS "CMAKE_SYSTEM_PROCESSOR:          ${CMAKE_SYSTEM_PROCESSOR}")
-message(STATUS "CMAKE_BUILD_TYPE:                ${CMAKE_BUILD_TYPE}")
-message(STATUS "CXX_STANDARD:                    ${CMAKE_CXX_STANDARD}")
-message(STATUS "CMAKE_CXX_COMPILER_ID:           ${CMAKE_CXX_COMPILER_ID}")
-message(STATUS "CMAKE_CXX_COMPILER_VERSION:      ${CMAKE_CXX_COMPILER_VERSION}")
-message(STATUS "CMAKE_CXX_COMPILER:              ${CMAKE_CXX_COMPILER}")
-message(STATUS "CMAKE_LINKER:                    ${CMAKE_LINKER}")
-message(STATUS "CMAKE_SOURCE_DIR:                ${CMAKE_SOURCE_DIR}")
-message(STATUS "DPNP_INSTALL_PREFIX:             ${CMAKE_INSTALL_PREFIX}")
-message(STATUS "CMAKE_VERBOSE_MAKEFILE:          ${CMAKE_VERBOSE_MAKEFILE}")
+target_link_libraries(${_trgt} PUBLIC MKL::MKL_DPCPP)
+target_link_libraries(${_trgt} PUBLIC oneDPL)
+
+# needed for STL headers with GCC < 11
+target_compile_definitions(${_trgt} PUBLIC _GLIBCXX_USE_TBB_PAR_BACKEND=0)
+target_compile_definitions(${_trgt} PUBLIC PSTL_USE_PARALLEL_POLICIES=0)
+# work-around for Windows at exit crash with predefined policies
+target_compile_definitions(${_trgt} PUBLIC ONEDPL_USE_PREDEFINED_POLICIES=0)
+
+target_include_directories(${_trgt} PUBLIC ${Dpctl_INCLUDE_DIR})
+target_link_directories(${_trgt} PUBLIC "${Dpctl_INCLUDE_DIR}/..")
+target_link_libraries(${_trgt} PUBLIC DPCTLSyclInterface)
+
+add_library(dpnp_backend_library INTERFACE IMPORTED GLOBAL)
+target_include_directories(dpnp_backend_library BEFORE INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/include ${CMAKE_CURRENT_SOURCE_DIR}/src)
+target_link_libraries(dpnp_backend_library INTERFACE ${_trgt})
+
+install(
+  TARGETS ${_trgt}
+  LIBRARY DESTINATION dpnp
+  PERMISSIONS
+  OWNER_WRITE
+  OWNER_READ
+  OWNER_EXECUTE
+  GROUP_READ
+  GROUP_EXECUTE
+  WORLD_READ
+  WORLD_EXECUTE
+  )
diff --git a/dpnp/backend/cmake/Modules/FindDPL.cmake b/dpnp/backend/cmake/Modules/FindDPL.cmake
deleted file mode 100644
index a5301e6fda97..000000000000
--- a/dpnp/backend/cmake/Modules/FindDPL.cmake
+++ /dev/null
@@ -1,53 +0,0 @@
-# *****************************************************************************
-# Copyright (c) 2016-2020, Intel Corporation
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-# - Redistributions of source code must retain the above copyright notice,
-#   this list of conditions and the following disclaimer.
-# - Redistributions in binary form must reproduce the above copyright notice,
-#   this list of conditions and the following disclaimer in the documentation
-#   and/or other materials provided with the distribution.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
-# THE POSSIBILITY OF SUCH DAMAGE.
-# *****************************************************************************
-
-# The following variables are optionally searched for defaults
-#  DPLROOT:         Environment variable to specify custom search place
-#  ONEAPI_ROOT:     Environment variable to specify search place from oneAPI
-#
-# The following are set after configuration is done:
-#  DPL_FOUND
-#  DPL_INCLUDE_DIR
-
-include(FindPackageHandleStandardArgs)
-
-set(DPNP_ONEAPI_DPL "$ENV{DPNP_ONEAPI_ROOT}/dpl/latest" CACHE PATH "Folder contains DPL files from ONEAPI_ROOT")
-
-if(DEFINED ENV{DPLROOT})
-  set(DPNP_DPLROOT "$ENV{DPLROOT}" CACHE PATH "Folder contains DPL files from DPLROOT")
-endif()
-
-find_path(
-  DPL_INCLUDE_DIR oneapi/dpl/algorithm
-  HINTS ${DPNP_DPLROOT} ${DPNP_ONEAPI_DPL} ENV CONDA_PREFIX ENV PREFIX # search order is important
-  PATH_SUFFIXES include linux/include
-  DOC "Path to DPL include files")
-
-find_package_handle_standard_args(DPL DEFAULT_MSG DPL_INCLUDE_DIR)
-
-if(DPL_FOUND)
-  message(STATUS "Found DPL:                       (include: ${DPL_INCLUDE_DIR})")
-  # mark_as_advanced(DPNP_DPLROOT DPL_INCLUDE_DIR)
-endif()
diff --git a/dpnp/backend/cmake/Modules/FindMathLib.cmake b/dpnp/backend/cmake/Modules/FindMathLib.cmake
deleted file mode 100644
index a5b5e1602e2e..000000000000
--- a/dpnp/backend/cmake/Modules/FindMathLib.cmake
+++ /dev/null
@@ -1,75 +0,0 @@
-# *****************************************************************************
-# Copyright (c) 2016-2020, Intel Corporation
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-# - Redistributions of source code must retain the above copyright notice,
-#   this list of conditions and the following disclaimer.
-# - Redistributions in binary form must reproduce the above copyright notice,
-#   this list of conditions and the following disclaimer in the documentation
-#   and/or other materials provided with the distribution.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
-# THE POSSIBILITY OF SUCH DAMAGE.
-# *****************************************************************************
-
-# The following variables are optionally searched for defaults
-#  MKLROOT:         Environment variable to specify custom search place
-#  ONEAPI_ROOT:     Environment variable to specify search place from oneAPI
-#
-# The following are set after configuration is done:
-#  MATHLIB_FOUND
-#  MATHLIB_INCLUDE_DIR
-#  MATHLIB_LIBRARY_DIR
-
-include(FindPackageHandleStandardArgs)
-
-set(DPNP_ONEAPI_MKL "$ENV{DPNP_ONEAPI_ROOT}/mkl/latest" CACHE PATH "Folder contains Math Lib files from ONEAPI_ROOT")
-
-if(DEFINED ENV{MKLROOT})
-  set(DPNP_MKLROOT "$ENV{MKLROOT}" CACHE PATH "Folder contains Math Lib files from MKLROOT")
-endif()
-
-if(UNIX)
-  set(MATHLIB_SYCL_LIB
-      ${CMAKE_SHARED_LIBRARY_PREFIX}mkl_sycl${CMAKE_SHARED_LIBRARY_SUFFIX}
-      CACHE PATH "SYCL math lib")
-elseif(WIN32)
-  set(MATHLIB_SYCL_LIB
-      mkl_sycl_dll${CMAKE_STATIC_LIBRARY_SUFFIX}
-      CACHE PATH "SYCL math lib")
-else()
-  message(FATAL_ERROR "Unsupported system ${CMAKE_SYSTEM} in MATHLIB_SYCL_LIB selection")
-endif()
-
-find_path(
-  MATHLIB_INCLUDE_DIR oneapi/mkl.hpp
-  HINTS ${DPNP_MKLROOT} ${DPNP_ONEAPI_MKL} ENV CONDA_PREFIX ENV PREFIX # search order is important
-  PATH_SUFFIXES include include
-  DOC "Path to mathlib include files")
-
-find_path(
-  MATHLIB_LIBRARY_DIR ${MATHLIB_SYCL_LIB}
-  HINTS ${DPNP_MKLROOT} ${DPNP_ONEAPI_MKL} ENV CONDA_PREFIX ENV PREFIX # search order is important
-  PATH_SUFFIXES lib lib/intel64
-  DOC "Path to mathlib library files")
-
-# TODO implement recurcive searching file (GLOB_RECURSE MY_PATH "/opt/intel/*/mkl.hpp")
-# message(STATUS "+++++++++++++:(include: ${MY_PATH})")
-
-find_package_handle_standard_args(MathLib DEFAULT_MSG MATHLIB_INCLUDE_DIR MATHLIB_LIBRARY_DIR)
-
-if(MathLib_FOUND)
-  message(STATUS "Found MathLib:                   (include: ${MATHLIB_INCLUDE_DIR}, library: ${MATHLIB_LIBRARY_DIR})")
-  # mark_as_advanced(DPNP_MKLROOT MATHLIB_INCLUDE_DIR MATHLIB_LIBRARY_DIR)
-endif()
diff --git a/dpnp/backend/saved_old_CMakeLists.txt b/dpnp/backend/saved_old_CMakeLists.txt
new file mode 100644
index 000000000000..330eb1030f3d
--- /dev/null
+++ b/dpnp/backend/saved_old_CMakeLists.txt
@@ -0,0 +1,354 @@
+# *****************************************************************************
+# Copyright (c) 2016-2023, Intel Corporation
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# - Redistributions of source code must retain the above copyright notice,
+#   this list of conditions and the following disclaimer.
+# - Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+# THE POSSIBILITY OF SUCH DAMAGE.
+# *****************************************************************************
+
+# cmake-format -i CMakeLists.txt --line-width=120
+
+cmake_minimum_required(VERSION 3.10 FATAL_ERROR)
+
+# set(DPNP_VERSION 0.11.1)
+# set(DPNP_API_VERSION 0.11)
+
+# set directory where the custom finders live
+set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/Modules")
+
+# -----------------------------------------------------------------------------------------------
+# Project build options...
+# -----------------------------------------------------------------------------------------------
+if(DEFINED ENV{ONEAPI_ROOT})
+  set(DPNP_ONEAPI_ROOT "$ENV{ONEAPI_ROOT}" CACHE PATH "Folder contains oneapi tool set")
+endif()
+
+option(DPNP_STATIC_LIB_ENABLE "Enable build DPNP static library" FALSE)
+option(DPNP_DEBUG_ENABLE "Enable output for DPNP_DEBUG statements" FALSE)
+option(DPNP_INSTALL_STRUCTURED "if FALSE, install package files into same directory" TRUE)
+option(DPNP_SYCL_QUEUE_MGR_ENABLE "Use external manager for SYCL queue" FALSE)
+option(DPNP_BACKEND_TESTS "Enable DPNP tests" FALSE)
+
+if(DEFINED ENV{DPNP_DEBUG})
+  set(DPNP_DEBUG_ENABLE $ENV{DPNP_DEBUG})
+endif()
+
+message(STATUS "CMAKE_VERSION:                   ${CMAKE_VERSION}")
+message(STATUS "CMAKE_GENERATOR:                 ${CMAKE_GENERATOR}")
+message(STATUS "CMAKE_HOST_SYSTEM_NAME:          ${CMAKE_HOST_SYSTEM_NAME}")
+message(STATUS "========== User controlled variables list ==========")
+message(STATUS "DPNP_ONEAPI_ROOT:                ${DPNP_ONEAPI_ROOT}")
+message(STATUS "DPNP_STATIC_LIB_ENABLE:          ${DPNP_STATIC_LIB_ENABLE}")
+message(STATUS "DPNP_DEBUG_ENABLE:               ${DPNP_DEBUG_ENABLE}")
+message(STATUS "DPNP_BACKEND_TESTS:              ${DPNP_BACKEND_TESTS}")
+message(STATUS "DPNP_INSTALL_STRUCTURED:         ${DPNP_INSTALL_STRUCTURED}")
+message(STATUS "DPNP_SYCL_QUEUE_MGR_ENABLE:      ${DPNP_SYCL_QUEUE_MGR_ENABLE}")
+message(STATUS " |- DPNP_QUEUEMGR_INCLUDE_DIR:   ${DPNP_QUEUEMGR_INCLUDE_DIR}")
+message(STATUS " |- DPNP_QUEUEMGR_LIB_DIR:       ${DPNP_QUEUEMGR_LIB_DIR}")
+message(STATUS "======= End of user controlled variables list ======")
+
+# -----------------------------------------------------------------------------------------------
+# Compiler-specific logic...
+# -----------------------------------------------------------------------------------------------
+
+# cmake 3.19.1 has a bug in dpcpp compiler detection. Let's assume it is a clang
+# set(CMAKE_CXX_COMPILER_ID "Clang")
+# set(CMAKE_CXX_COMPILER_VERSION 12.0)
+if (CMAKE_VERSION VERSION_EQUAL 3.19.1)
+    message(FATAL_ERROR
+      " Unsupported cmake version ${CMAKE_VERSION}\n"
+      " Please use other cmake version, for example:\n"
+      "in Linux:\n"
+      " curl --output cmake_webimage.tar.gz --url https://cmake.org/files/v3.19/cmake-3.19.2-Linux-x86_64.tar.gz --retry 5 --retry-delay 5\n"
+      " tar -xzf cmake_webimage.tar.gz\n"
+      " rm -f cmake_webimage.tar.gz\n"
+      " export PATH=`pwd`/cmake-3.19.2-Linux-x86_64/bin:$PATH\n"
+      "in Windows:\n"
+      " curl.exe --output cmake_webimage.zip --url https://cmake.org/files/v3.19/cmake-3.19.2-win64-x64.zip --retry 5 --retry-delay 5\n"
+      " tar -xf cmake_webimage.zip\n"
+      " del cmake_webimage.zip\n"
+      " set PATH=%CD%\\cmake-3.19.2-win64-x64\\bin;%PATH%\n"
+        )
+endif()
+
+# SYCL related compile options
+string(CONCAT COMMON_COMPILE_FLAGS
+  "-fsycl "
+  "-fno-approx-func "
+  "-fno-finite-math-only "
+)
+string(CONCAT COMMON_LINK_FLAGS
+  "-fsycl "
+  "-fsycl-device-code-split=per_kernel "
+)
+if(UNIX)
+  set(CMAKE_CXX_COMPILER "icpx")
+  # add_compile_options(-fPIC)
+elseif(WIN32)
+  set(CMAKE_CXX_COMPILER "icx")
+  # set(CMAKE_SHARED_LINKER_FLAGS_INIT "-fuse-ld=lld-link")
+  # set(CMAKE_LINKER "lld-link")
+  # include (Platform/Windows-Clang)
+  # set(CMAKE_LINKER "dpcpp")
+  # set(CMAKE_AR "llvm-ar")
+  # set(CMAKE_RANLIB "llvm-ranlib")
+  # set(CMAKE_CXX_FLAGS "/EHsc")
+
+  string(APPEND COMMON_COMPILE_FLAGS
+    "/EHsc "
+#    "/Ox "
+#    "/W3 "
+#    "/GL "
+#    "/DNDEBUG "
+#    "/MD "
+  )
+else()
+  message(FATAL_ERROR "Unsupported system ${CMAKE_SYSTEM} in compiler selection case")
+endif()
+
+# set language version
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+
+# warning flag set
+string(CONCAT DPNP_WARNING_FLAGS
+  "-W "
+  "-Wextra "
+  "-Wshadow "
+  "-Wall "
+  "-Wstrict-prototypes "
+  "-Wformat "
+  "-Wformat-security "
+)
+string(APPEND COMMON_COMPILE_FLAGS
+  "${DPNP_WARNING_FLAGS}"
+)
+
+# debug/release compile definitions
+if(DPNP_DEBUG_ENABLE)
+  set(CMAKE_BUILD_TYPE "Debug")
+  string(APPEND COMMON_COMPILE_FLAGS
+    "-O0 "
+    "-ggdb3 "
+  )
+  string(APPEND COMMON_LINK_FLAGS
+    "-O0 "
+    "-ggdb3 "
+    "-fsycl-link-huge-device-code "
+  )
+else()
+  set(CMAKE_BUILD_TYPE "Release")
+  string(APPEND COMMON_COMPILE_FLAGS
+    "-O3 "
+  )
+endif()
+
+# -----------------------------------------------------------------------------------------------
+# Auxilary building options...
+# -----------------------------------------------------------------------------------------------
+# sdl
+string(CONCAT DPNP_DEFS
+  "-D_FORTIFY_SOURCE=2 "
+)
+if(NOT WIN32)
+  string(APPEND COMMON_COMPILE_FLAGS
+    "-fno-delete-null-pointer-checks "
+    "-fstack-protector-strong "
+    "-fno-strict-overflow "
+    "-fwrapv "
+    )
+  string(APPEND COMMON_LINK_FLAGS
+    "LINKER:-z,noexecstack,-z,relro,-z,now "
+  )
+endif()
+
+# disable PSTL policies due to compiler bug
+string(APPEND DPNP_DEFS
+  "-DPSTL_USE_PARALLEL_POLICIES=0 "
+  "-D_GLIBCXX_USE_TBB_PAR_BACKEND=0 "
+)
+
+# disable PSTL predefined policies objects (global queues, prevent fail on Windows)
+string(APPEND DPNP_DEFS
+  "-DONEDPL_USE_PREDEFINED_POLICIES=0 "
+)
+
+# -----------------------------------------------------------------------------------------------
+# Create project...
+# -----------------------------------------------------------------------------------------------
+# set(CMAKE_CXX_COMPILER "clang++")
+project(dpnp_project
+        # VERSION ${DPNP_VERSION}
+        DESCRIPTION "DPNP: NumPy-like API accelerated with SYCL"
+        HOMEPAGE_URL https://github.com/IntelPython/dpnp
+        LANGUAGES CXX)
+# set(CMAKE_CXX_COMPILER "dpcpp")
+
+# -----------------------------------------------------------------------------------------------
+# Building logic...
+# -----------------------------------------------------------------------------------------------
+set(DPNP_SRC
+    kernels/dpnp_krnl_arraycreation.cpp
+    kernels/dpnp_krnl_bitwise.cpp
+    kernels/dpnp_krnl_common.cpp
+    kernels/dpnp_krnl_elemwise.cpp
+    kernels/dpnp_krnl_fft.cpp
+    kernels/dpnp_krnl_indexing.cpp
+    kernels/dpnp_krnl_linalg.cpp
+    kernels/dpnp_krnl_logic.cpp
+    kernels/dpnp_krnl_manipulation.cpp
+    kernels/dpnp_krnl_mathematical.cpp
+    kernels/dpnp_krnl_random.cpp
+    kernels/dpnp_krnl_reduction.cpp
+    kernels/dpnp_krnl_searching.cpp
+    kernels/dpnp_krnl_sorting.cpp
+    kernels/dpnp_krnl_statistics.cpp
+    src/constants.cpp
+    src/dpnp_iface_fptr.cpp
+    src/memory_sycl.cpp
+    src/queue_sycl.cpp
+    src/verbose.cpp
+    src/dpnp_random_state.cpp
+    )
+
+if(DPNP_STATIC_LIB_ENABLE)
+  add_library(dpnp_backend_c STATIC ${DPNP_SRC})
+else()
+  add_library(dpnp_backend_c SHARED ${DPNP_SRC})
+  set_target_properties(dpnp_backend_c PROPERTIES CMAKE_POSITION_INDEPENDENT_CODE ON)
+endif()
+
+target_include_directories(dpnp_backend_c PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
+target_include_directories(dpnp_backend_c PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/src)
+
+string(REPLACE " " ";" COMMON_COMPILE_FLAGS_AS_LIST ${COMMON_COMPILE_FLAGS})
+target_compile_options(dpnp_backend_c PUBLIC ${COMMON_COMPILE_FLAGS_AS_LIST})
+string(REPLACE " " ";" DPNP_DEFS_AS_LIST ${DPNP_DEFS})
+target_compile_definitions(dpnp_backend_c PUBLIC ${DPNP_DEFS_AS_LIST})
+string(REPLACE " " ";" COMMON_LINK_FLAGS_AS_LIST ${COMMON_LINK_FLAGS})
+target_link_options(dpnp_backend_c PUBLIC ${COMMON_LINK_FLAGS_AS_LIST})
+
+
+# -----------------------------------------------------------------------------------------------
+# Testing logic...
+# -----------------------------------------------------------------------------------------------
+if(DPNP_BACKEND_TESTS)
+  add_subdirectory(tests)
+endif()
+
+# -----------------------------------------------------------------------------------------------
+# Dependencies logic...
+# -----------------------------------------------------------------------------------------------
+# Math library
+find_package(MathLib REQUIRED)
+target_compile_definitions(dpnp_backend_c PUBLIC -DMKL_ILP64=1)
+target_include_directories(dpnp_backend_c PUBLIC ${MATHLIB_INCLUDE_DIR})
+
+link_directories(dpnp_backend_c PUBLIC ${MATHLIB_LIBRARY_DIR}) # does not work with some cmake versions
+target_link_directories(dpnp_backend_c PUBLIC ${MATHLIB_LIBRARY_DIR}) # duplicate link_directories
+
+if(UNIX)
+# Link Line Advisor v6.13
+# -DMKL_ILP64 -I"${MKLROOT}/include"
+# -lmkl_sycl -lmkl_intel_ilp64 -lmkl_sequential -lmkl_core -lsycl -lOpenCL -lpthread -lm -ldl
+  set(DPNP_MATHLIB_DEP_LIBS
+      mkl_sycl
+      mkl_intel_ilp64
+      mkl_tbb_thread # mkl_sequential
+      mkl_core
+      sycl
+      OpenCL
+      pthread
+      m
+      dl
+      CACHE STRING "Set of libraries to link")
+elseif(WIN32)
+# Link Line Advisor v6.13
+# -DMKL_ILP64 -I"%MKLROOT%\include"
+# mkl_sycl_dll.lib mkl_intel_ilp64_dll.lib mkl_sequential_dll.lib mkl_core_dll.lib sycl.lib OpenCL.lib
+  set(DPNP_MATHLIB_DEP_LIBS
+      mkl_sycl_dll
+      mkl_intel_ilp64_dll
+      mkl_tbb_thread_dll # mkl_sequential_dll
+      mkl_core_dll
+      sycl
+      OpenCL
+      CACHE STRING "Set of libraries to link")
+else()
+  message(FATAL_ERROR "Unsupported system ${CMAKE_SYSTEM} in MathLib libraries set")
+endif()
+
+target_link_libraries(dpnp_backend_c PUBLIC ${DPNP_MATHLIB_DEP_LIBS})
+
+# Parallel STL
+find_package(DPL REQUIRED)
+target_include_directories(dpnp_backend_c PUBLIC ${DPL_INCLUDE_DIR})
+
+# SYCL queue manager
+if(DPNP_SYCL_QUEUE_MGR_ENABLE)
+    target_include_directories(dpnp_backend_c PUBLIC ${DPNP_QUEUEMGR_INCLUDE_DIR})
+    target_link_directories(dpnp_backend_c PUBLIC ${DPNP_QUEUEMGR_LIB_DIR})
+    target_link_libraries(dpnp_backend_c PUBLIC "DPCTLSyclInterface")
+
+    # not sure but add runpath
+    set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}:${DPNP_QUEUEMGR_LIB_DIR}")
+
+    # disable stripping rpath in installation logic
+    set_target_properties(dpnp_backend_c PROPERTIES INSTALL_RPATH_USE_LINK_PATH TRUE)
+else()
+    target_compiler_definitions(dpnp_backend_c PUBLIC -DDPNP_LOCAL_QUEUE=1)
+endif()
+
+# -----------------------------------------------------------------------------------------------
+# Installation logic...
+# -----------------------------------------------------------------------------------------------
+set(CMAKE_INSTALL_PREFIX ${CMAKE_SOURCE_DIR}/package_dpnp)
+
+if(DEFINED DPNP_INSTALL_PREFIX)
+  set(CMAKE_INSTALL_PREFIX ${DPNP_INSTALL_PREFIX})
+endif()
+
+if(NOT DPNP_INSTALL_STRUCTURED)
+  set(CMAKE_INSTALL_BINDIR ${CMAKE_INSTALL_PREFIX})
+  set(CMAKE_INSTALL_LIBDIR ${CMAKE_INSTALL_PREFIX})
+  set(CMAKE_INSTALL_INCLUDEDIR ${CMAKE_INSTALL_PREFIX})
+endif()
+
+# set_target_properties(dpnp_backend_c PROPERTIES VERSION ${DPNP_VERSION} SOVERSION ${DPNP_API_VERSION})
+
+install(TARGETS dpnp_backend_c
+        PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE
+        )
+
+# -----------------------------------------------------------------------------------------------
+# Print Global Options
+# -----------------------------------------------------------------------------------------------
+message(STATUS "CMAKE_SYSTEM:                    ${CMAKE_SYSTEM}")
+message(STATUS "CMAKE_SYSTEM_VERSION:            ${CMAKE_SYSTEM_VERSION}")
+message(STATUS "CMAKE_SYSTEM_PROCESSOR:          ${CMAKE_SYSTEM_PROCESSOR}")
+message(STATUS "CMAKE_BUILD_TYPE:                ${CMAKE_BUILD_TYPE}")
+message(STATUS "CXX_STANDARD:                    ${CMAKE_CXX_STANDARD}")
+message(STATUS "CMAKE_CXX_COMPILER_ID:           ${CMAKE_CXX_COMPILER_ID}")
+message(STATUS "CMAKE_CXX_COMPILER_VERSION:      ${CMAKE_CXX_COMPILER_VERSION}")
+message(STATUS "CMAKE_CXX_COMPILER:              ${CMAKE_CXX_COMPILER}")
+message(STATUS "CMAKE_LINKER:                    ${CMAKE_LINKER}")
+message(STATUS "CMAKE_SOURCE_DIR:                ${CMAKE_SOURCE_DIR}")
+message(STATUS "DPNP_INSTALL_PREFIX:             ${CMAKE_INSTALL_PREFIX}")
+message(STATUS "CMAKE_VERBOSE_MAKEFILE:          ${CMAKE_VERBOSE_MAKEFILE}")
diff --git a/dpnp/dpnp_algo/CMakeLists.txt b/dpnp/dpnp_algo/CMakeLists.txt
new file mode 100644
index 000000000000..9f2921b53d55
--- /dev/null
+++ b/dpnp/dpnp_algo/CMakeLists.txt
@@ -0,0 +1,25 @@
+
+set(dpnp_algo_pyx_deps
+  ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_linearalgebra.pyx
+  ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_manipulation.pyx
+  ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_counting.pyx
+  ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_statistics.pyx
+  ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_trigonometric.pyx
+  ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_sorting.pyx
+  ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_arraycreation.pyx
+  ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_mathematical.pyx
+  ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_searching.pyx
+  ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_indexing.pyx
+  ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_logic.pyx
+  ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_bitwise.pyx
+  ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_special.pyx
+  )
+
+build_dpnp_cython_ext_with_backend(
+  dpnp_algo
+  ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo.pyx
+  dpnp/dpnp_algo
+  )
+
+add_custom_target(_dpnp_algo_deps DEPENDS ${dpnp_algo_pyx_deps})
+add_dependencies(dpnp_algo _dpnp_algo_deps)
diff --git a/dpnp/dpnp_utils/CMakeLists.txt b/dpnp/dpnp_utils/CMakeLists.txt
new file mode 100644
index 000000000000..75d403f74cfe
--- /dev/null
+++ b/dpnp/dpnp_utils/CMakeLists.txt
@@ -0,0 +1,7 @@
+# Building dpnp_algo_utils Cython extension
+
+build_dpnp_cython_ext_with_backend(
+  dpnp_algo_utils
+  ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_utils.pyx
+  dpnp/dpnp_utils
+  )
diff --git a/dpnp/fft/CMakeLists.txt b/dpnp/fft/CMakeLists.txt
new file mode 100644
index 000000000000..3b6146a2a854
--- /dev/null
+++ b/dpnp/fft/CMakeLists.txt
@@ -0,0 +1,7 @@
+# Building dpnp_algo_fft Cython extension
+
+build_dpnp_cython_ext_with_backend(
+  dpnp_algo_fft
+  ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_fft.pyx
+  dpnp/fft
+  )
diff --git a/dpnp/linalg/CMakeLists.txt b/dpnp/linalg/CMakeLists.txt
new file mode 100644
index 000000000000..a04d5f3b64e9
--- /dev/null
+++ b/dpnp/linalg/CMakeLists.txt
@@ -0,0 +1,7 @@
+# Building dpnp_algo_linalg Cython extension
+
+build_dpnp_cython_ext_with_backend(
+  dpnp_algo_linalg
+  ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_linalg.pyx
+  dpnp/linalg
+  )
diff --git a/dpnp/random/CMakeLists.txt b/dpnp/random/CMakeLists.txt
new file mode 100644
index 000000000000..b7204a9d5c74
--- /dev/null
+++ b/dpnp/random/CMakeLists.txt
@@ -0,0 +1,7 @@
+# Building dpnp_algo_random Cython extension
+
+build_dpnp_cython_ext_with_backend(
+  dpnp_algo_random
+  ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_random.pyx
+  dpnp/random
+  )
diff --git a/setup.py b/setup.py
index 31befbc1b237..608dd8dd1c71 100644
--- a/setup.py
+++ b/setup.py
@@ -1,63 +1,6 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# *****************************************************************************
-# Copyright (c) 2016-2020, Intel Corporation
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-# - Redistributions of source code must retain the above copyright notice,
-#   this list of conditions and the following disclaimer.
-# - Redistributions in binary form must reproduce the above copyright notice,
-#   this list of conditions and the following disclaimer in the documentation
-#   and/or other materials provided with the distribution.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
-# THE POSSIBILITY OF SUCH DAMAGE.
-# *****************************************************************************
-
-""" NumPy is the fundamental package for array computing with Python.
-
-It provides:
-
-- a powerful N-dimensional array object
-- sophisticated (broadcasting) functions
-- tools for integrating C/C++ and Fortran code
-- useful linear algebra, Fourier transform, and random number capabilities
-- and much more
-
-"""
-
-import importlib.machinery as imm  # Python 3 is required
-import sys
+from skbuild import setup
 import os
-import dpctl
-import numpy
-
-from setuptools import setup, Extension
-from Cython.Build import cythonize
-from Cython.Compiler import Options as cython_options
-
-from utils.command_style import source_style
-from utils.command_clean import source_clean
-from utils.command_build_clib import custom_build_clib, dpnp_backend_c_description, _project_backend_dir, _sdl_cflags, _project_extra_link_args, IS_WIN
-from utils.command_build_cmake_clib import custom_build_cmake_clib
-
-
-"""
-Python version check
-"""
-if sys.version_info[:2] < (3, 6):
-    raise RuntimeError("DPNP: Python version >= 3.6 required.")
+import importlib.machinery as imm
 
 
 """
@@ -67,175 +10,22 @@
 version_mod = imm.SourceFileLoader('version', os.path.join(thefile_path, 'dpnp', 'version.py')).load_module()
 __version__ = version_mod.__version__
 
-
-"""
-Set project auxilary data like readme and licence files
-"""
-with open('README.md') as f:
-    __readme_file__ = f.read()
-
-CLASSIFIERS = """\
-Development Status :: 4 - Beta
-Intended Audience :: Science/Research
-Intended Audience :: Developers
-License :: OSI Approved
-Programming Language :: C
-Programming Language :: Python
-Programming Language :: Python :: 3
-Programming Language :: Python :: 3.6
-Programming Language :: Python :: 3.7
-Programming Language :: Python :: 3.8
-Programming Language :: Python :: Implementation :: CPython
-Topic :: Software Development
-Topic :: Scientific/Engineering
-Operating System :: Microsoft :: Windows
-Operating System :: POSIX
-Operating System :: Unix
-Operating System :: MacOS
-"""
-
-"""
-Extra defined commands for the build system
-
->$ python ./setup.py --help-commands
-
->$ python ./setup.py style
->$ python ./setup.py style -a
->$ python ./setup.py clean
-
-TODO: spell check, valgrind, code coverage
-"""
-
-# TODO: refactor/fix
-# on Win we need a specific build_clib definition to prevent using cmake during build_ext execution
-if IS_WIN:
-    dpnp_build_commands = {'style': source_style,
-                           'build_clib_setuptools': custom_build_clib,
-                           'build_clib': custom_build_clib,
-                           'clean': source_clean
-                           }
-else:
-    dpnp_build_commands = {'style': source_style,
-                           'build_clib_setuptools': custom_build_clib,
-                           'build_clib': custom_build_cmake_clib,
-                           'clean': source_clean
-                           }
-
-if IS_WIN:
-    '''
-    This variable controls setuptools execution on windows
-    to avoid automatically search and confirm workability of the compiler
-    If not set, error "Microsoft Visual C++ 14.0 or greater is required." appears
-    '''
-    os.environ["DISTUTILS_USE_SDK"] = "1"
-
-"""
-The project modules description
-"""
-kwargs_common = {
-    "include_dirs": [numpy.get_include(), dpctl.get_include()] + _project_backend_dir,
-    "library_dirs": [os.path.dirname(dpctl.get_include()),],
-    "libraries": ["DPCTLSyclInterface"],
-    "extra_compile_args": _sdl_cflags,
-    "extra_link_args": _project_extra_link_args,
-    "define_macros": [("NPY_NO_DEPRECATED_API", "NPY_1_7_API_VERSION")],
-    "language": "c++"
-}
-
-dpnp_algo = Extension(
-    name="dpnp.dpnp_algo.dpnp_algo",
-    sources=[os.path.join("dpnp", "dpnp_algo", "dpnp_algo.pyx")],
-    depends=[
-        os.path.join("dpnp", "dpnp_algo", "dpnp_algo_linearalgebra.pyx"),
-        os.path.join("dpnp", "dpnp_algo", "dpnp_algo_manipulation.pyx"),
-        os.path.join("dpnp", "dpnp_algo", "dpnp_algo_counting.pyx"),
-        os.path.join("dpnp", "dpnp_algo", "dpnp_algo_statistics.pyx"),
-        os.path.join("dpnp", "dpnp_algo", "dpnp_algo_trigonometric.pyx"),
-        os.path.join("dpnp", "dpnp_algo", "dpnp_algo_sorting.pyx"),
-        os.path.join("dpnp", "dpnp_algo", "dpnp_algo_arraycreation.pyx"),
-        os.path.join("dpnp", "dpnp_algo", "dpnp_algo_mathematical.pyx"),
-        os.path.join("dpnp", "dpnp_algo", "dpnp_algo_searching.pyx"),
-        os.path.join("dpnp", "dpnp_algo", "dpnp_algo_indexing.pyx"),
-        os.path.join("dpnp", "dpnp_algo", "dpnp_algo_logic.pyx"),
-        os.path.join("dpnp", "dpnp_algo", "dpnp_algo_bitwise.pyx"),
-        os.path.join("dpnp", "dpnp_algo", "dpnp_algo_special.pyx"),
+setup(
+    name="dpnp",
+    version=__version__,
+    description="",
+    long_description="",
+    long_description_content_type="text/markdown",
+    license="Apache 2.0",
+    author="Intel Corporation",
+    url="https://github.com/IntelPython/dpnp",
+    packages=['dpnp',
+              'dpnp.dpnp_algo',
+              'dpnp.dpnp_utils',
+              'dpnp.fft',
+              'dpnp.linalg',
+              'dpnp.random'
     ],
-    **kwargs_common)
-
-dpnp_dparray = Extension(
-    name="dpnp.dparray",
-    sources=[os.path.join("dpnp", "dparray.pyx")],
-    **kwargs_common)
-
-dpnp_random = Extension(
-    name="dpnp.random.dpnp_algo_random",
-    sources=[os.path.join("dpnp", "random", "dpnp_algo_random.pyx")],
-    **kwargs_common)
-
-dpnp_linalg = Extension(
-    name="dpnp.linalg.dpnp_algo_linalg",
-    sources=[os.path.join("dpnp", "linalg", "dpnp_algo_linalg.pyx")],
-    **kwargs_common)
-
-dpnp_fft = Extension(
-    name="dpnp.fft.dpnp_algo_fft",
-    sources=[os.path.join("dpnp", "fft", "dpnp_algo_fft.pyx")],
-    **kwargs_common)
-
-dpnp_utils = Extension(
-    name="dpnp.dpnp_utils.dpnp_algo_utils",
-    sources=[os.path.join("dpnp", "dpnp_utils", "dpnp_algo_utils.pyx")],
-    **kwargs_common)
-
-cython_options.docstrings = True
-cython_options.warning_errors = True
-
-dpnp_cython_mods = cythonize([dpnp_algo, dpnp_dparray, dpnp_random, dpnp_utils, dpnp_linalg, dpnp_fft],
-                             compiler_directives={"language_level": sys.version_info[0],
-                                                  "warn.unused": False,
-                                                  "warn.unused_result": False,
-                                                  "warn.maybe_uninitialized": False,
-                                                  "warn.undeclared": False,
-                                                  "boundscheck": True,
-                                                  "linetrace": True
-                                                  },
-                             gdb_debug=False,
-                             build_dir="build_cython",
-                             annotate=False,
-                             quiet=False)
-
-setup(name="dpnp",
-      version=__version__,
-      description="NumPy-like API accelerated with SYCL",
-      long_description=__readme_file__,
-      long_description_content_type="text/markdown",
-      author="Intel Corporation",
-      maintainer="Intel Corp.",
-      maintainer_email="scripting@intel.com",
-      url="https://intelpython.github.io/dpnp/",
-      download_url="https://github.com/IntelPython/dpnp",
-      license='BSD',
-      classifiers=[_f for _f in CLASSIFIERS.split('\n') if _f],
-      keywords="sycl numpy python3 intel mkl oneapi gpu dpcpp pstl",
-      platforms=["Linux", "Windows"],
-      test_suite="pytest",
-      python_requires=">=3.6",
-      install_requires=["numpy>=1.15"],
-      setup_requires=["numpy>=1.15"],
-      tests_require=["numpy>=1.15"],
-      ext_modules=dpnp_cython_mods,
-      cmdclass=dpnp_build_commands,
-      packages=['dpnp',
-                'dpnp.dpnp_algo',
-                'dpnp.dpnp_utils',
-                'dpnp.fft',
-                'dpnp.linalg',
-                'dpnp.random'
-                ],
-      package_data={'dpnp': ['libdpnp_backend_c.so', 'dpnp_backend_c.lib', 'dpnp_backend_c.dll']},
-      include_package_data=True,
-
-      # this is needed for 'build' command to automatically call 'build_clib'
-      # it attach the library to all extensions (it is not needed)
-      libraries=dpnp_backend_c_description
-      )
+    package_data={'dpnp': ['libdpnp_backend_c.so', 'dpnp_backend_c.lib', 'dpnp_backend_c.dll']},
+    include_package_data=True,
+)
diff --git a/utils/command_build_clib.py b/utils/command_build_clib.py
deleted file mode 100644
index d16bab3aec4a..000000000000
--- a/utils/command_build_clib.py
+++ /dev/null
@@ -1,427 +0,0 @@
-# -*- coding: utf-8 -*-
-# *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-# - Redistributions of source code must retain the above copyright notice,
-#   this list of conditions and the following disclaimer.
-# - Redistributions in binary form must reproduce the above copyright notice,
-#   this list of conditions and the following disclaimer in the documentation
-#   and/or other materials provided with the distribution.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
-# THE POSSIBILITY OF SUCH DAMAGE.
-# *****************************************************************************
-
-"""Module to customize build_clib command
-Originally, 'build_clib' command produce static C library only.
-This modification add:
- - build shared C library
- - copy this library to the project tree
- - a check if source needs to be rebuilt based on time stamp
- - a check if librayr needs to be rebuilt based on time stamp
-"""
-
-import os
-import sys
-
-from ctypes.util import find_library as find_shared_lib
-from setuptools.command import build_clib
-from distutils import log
-from distutils.dep_util import newer_group
-from distutils.file_util import copy_file
-
-from utils.dpnp_build_utils import find_cmplr, find_dpl, find_mathlib, find_python_env
-
-IS_WIN = False
-IS_MAC = False
-IS_LIN = False
-
-if 'linux' in sys.platform:
-    IS_LIN = True
-elif sys.platform == 'darwin':
-    IS_MAC = True
-elif sys.platform in ['win32', 'cygwin']:
-    IS_WIN = True
-else:
-    raise EnvironmentError("DPNP: " + sys.platform + " not supported")
-
-"""
-Set compiler for the project
-"""
-# default variables (for Linux)
-_project_compiler = "icpx"
-_project_linker = "icpx"
-_project_cmplr_flag_sycl_devel = ["-fsycl-device-code-split=per_kernel", "-fno-approx-func", "-fno-finite-math-only"]
-_project_cmplr_flag_sycl = ["-fsycl"]
-_project_cmplr_flag_stdcpp_static = []  # This brakes TBB ["-static-libstdc++", "-static-libgcc"]
-_project_cmplr_flag_compatibility = ["-Wl,--enable-new-dtags"]
-_project_cmplr_flag_lib = ["-shared"]
-_project_cmplr_flag_release_build = ["-O3", "-DNDEBUG", "-fPIC"]
-_project_cmplr_flag_debug_build = ["-g", "-O1", "-W", "-Wextra", "-Wshadow", "-Wall", "-Wstrict-prototypes", "-fPIC"]
-_project_cmplr_flag_default_build = []
-_project_cmplr_macro = []
-_project_force_build = False
-_project_sycl_queue_control_macro = [("DPNP_LOCAL_QUEUE", "1")]
-_project_rpath = ["$ORIGIN", os.path.join("$ORIGIN", "..")]
-_dpctrl_include = []
-_dpctrl_libpath = []
-_dpctrl_lib = []
-_sdl_cflags = ["-fstack-protector-strong",
-               "-fPIC", "-D_FORTIFY_SOURCE=2",
-               "-Wformat",
-               "-Wformat-security",
-               "-fno-strict-overflow",
-               "-fno-delete-null-pointer-checks"]
-_sdl_ldflags = ["-Wl,-z,noexecstack,-z,relro,-z,now"]
-
-# TODO remove when it will be fixed on TBB side. Details:
-# In GCC versions 9 and 10 the application that uses Parallel STL algorithms may fail to compile due to incompatible
-# interface changes between earlier versions of Intel TBB and oneTBB. Disable support for Parallel STL algorithms
-# by defining PSTL_USE_PARALLEL_POLICIES (in GCC 9), _GLIBCXX_USE_TBB_PAR_BACKEND (in GCC 10) macro to zero
-# before inclusion of the first standard header file in each translation unit.
-_project_cmplr_macro += [("PSTL_USE_PARALLEL_POLICIES", "0"), ("_GLIBCXX_USE_TBB_PAR_BACKEND", "0")]
-
-# disable PSTL predefined policies objects (global queues, prevent fail on Windows)
-_project_cmplr_macro += [("ONEDPL_USE_PREDEFINED_POLICIES", "0")]
-
-try:
-    """
-    Detect external SYCL queue handling library
-    """
-    import dpctl
-
-    _dpctrl_include += [dpctl.get_include()]
-    # _dpctrl_libpath = for package build + for local build
-    _dpctrl_libpath = ["$ORIGIN/../dpctl"] + [os.path.join(dpctl.get_include(), '..')]
-    _dpctrl_lib = ["DPCTLSyclInterface"]
-except ImportError:
-    """
-    Set local SYCL queue handler
-    """
-    _project_cmplr_macro += _project_sycl_queue_control_macro
-
-# other OS specific
-if IS_WIN:
-    _project_compiler = "dpcpp"
-    _project_linker = "lld-link"
-    _project_cmplr_flag_sycl = []
-    _project_cmplr_flag_stdcpp_static = []
-    _project_cmplr_flag_compatibility = []
-    _project_cmplr_flag_lib = ["/DLL"]
-    _project_cmplr_flag_release_build += _project_cmplr_flag_sycl_devel
-    _project_cmplr_macro += [("_WIN", "1")]
-    _project_rpath = []
-    # TODO this flag creates unexpected behavior during compilation, need to be fixed
-    # _sdl_cflags = ["-GS"]
-    _sdl_cflags = []
-    _sdl_ldflags = ["-NXCompat", "-DynamicBase"]
-
-"""
-Get the project build type
-"""
-__dpnp_debug__ = os.environ.get('DPNP_DEBUG', None)
-if __dpnp_debug__ is not None:
-    """
-    Debug configuration
-    """
-    _project_cmplr_flag_default_build = _project_cmplr_flag_debug_build
-else:
-    """
-    Release configuration
-    """
-    _project_cmplr_flag_sycl += _project_cmplr_flag_sycl_devel
-    _project_cmplr_flag_default_build = _project_cmplr_flag_release_build
-
-"""
-Get the math library environemnt
-"""
-_project_cmplr_macro += [("MKL_ILP64", "1")]  # using 64bit integers in MKL interface (long)
-if IS_LIN:
-    _mathlibs = ["mkl_sycl", "mkl_intel_ilp64", "mkl_sequential",
-                 "mkl_core", "sycl", "OpenCL", "pthread", "m", "dl"]
-elif IS_WIN:
-    _sycl_lib = None
-    for lib in {"sycl", "sycl6", "sycl7"}:
-        if find_shared_lib(lib):
-            _sycl_lib = lib
-    if not _sycl_lib:
-        raise EnvironmentError("DPNP: sycl library is not found")
-
-    _mathlibs = ["mkl_sycl_dll", "mkl_intel_ilp64_dll", "mkl_tbb_thread_dll", "mkl_core_dll", _sycl_lib, "OpenCL", "tbb"]
-
-"""
-Final set of arguments for extentions
-"""
-_project_extra_link_args = _project_cmplr_flag_compatibility + _project_cmplr_flag_stdcpp_static + \
-    ["-Wl,-rpath," + x for x in _project_rpath] + _sdl_ldflags
-_project_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..")
-_project_backend_dir = [os.path.join(_project_dir, "dpnp", "backend", "include"),
-                        os.path.join(_project_dir, "dpnp", "backend", "src")  # not a public headers location
-                        ]
-
-dpnp_backend_c_description = [
-    ["dpnp_backend_c",
-        {
-            "sources": [
-                "dpnp/backend/kernels/dpnp_krnl_arraycreation.cpp",
-                "dpnp/backend/kernels/dpnp_krnl_bitwise.cpp",
-                "dpnp/backend/kernels/dpnp_krnl_common.cpp",
-                "dpnp/backend/kernels/dpnp_krnl_elemwise.cpp",
-                "dpnp/backend/kernels/dpnp_krnl_fft.cpp",
-                "dpnp/backend/kernels/dpnp_krnl_indexing.cpp",
-                "dpnp/backend/kernels/dpnp_krnl_linalg.cpp",
-                "dpnp/backend/kernels/dpnp_krnl_logic.cpp",
-                "dpnp/backend/kernels/dpnp_krnl_manipulation.cpp",
-                "dpnp/backend/kernels/dpnp_krnl_mathematical.cpp",
-                "dpnp/backend/kernels/dpnp_krnl_random.cpp",
-                "dpnp/backend/kernels/dpnp_krnl_reduction.cpp",
-                "dpnp/backend/kernels/dpnp_krnl_searching.cpp",
-                "dpnp/backend/kernels/dpnp_krnl_sorting.cpp",
-                "dpnp/backend/kernels/dpnp_krnl_statistics.cpp",
-                "dpnp/backend/src/dpnp_iface_fptr.cpp",
-                "dpnp/backend/src/memory_sycl.cpp",
-                "dpnp/backend/src/constants.cpp",
-                "dpnp/backend/src/queue_sycl.cpp",
-                "dpnp/backend/src/verbose.cpp",
-                "dpnp/backend/src/dpnp_random_state.cpp"
-            ],
-        }
-     ]
-]
-
-
-def _compiler_compile(self, sources,
-                      output_dir=None, macros=None, include_dirs=None, debug=0,
-                      extra_preargs=None, extra_postargs=None, depends=None):
-
-    if not self.initialized:
-        self.initialize()
-    compile_info = self._setup_compile(output_dir, macros, include_dirs,
-                                       sources, depends, extra_postargs)
-    macros, objects, extra_postargs, pp_opts, build = compile_info
-
-    compile_opts = extra_preargs or []
-    compile_opts.append('/c')
-    if debug:
-        compile_opts.extend(self.compile_options_debug)
-    else:
-        compile_opts.extend(self.compile_options)
-
-    add_cpp_opts = False
-
-    for obj in objects:
-        try:
-            src, ext = build[obj]
-        except KeyError:
-            continue
-        if debug:
-            # pass the full pathname to MSVC in debug mode,
-            # this allows the debugger to find the source file
-            # without asking the user to browse for it
-            src = os.path.abspath(src)
-
-        # Anaconda/conda-forge customisation, we want our pdbs to be
-        # relocatable:
-        # https://developercommunity.visualstudio.com/comments/623156/view.html
-        d1trimfile_opts = []
-        # if 'SRC_DIR' in os.environ:
-        # d1trimfile_opts.append("/d1trimfile:" + os.environ['SRC_DIR'])
-
-        if ext in self._c_extensions:
-            input_opt = "/Tc" + src
-        elif ext in self._cpp_extensions:
-            input_opt = "/Tp" + src
-            add_cpp_opts = True
-        elif ext in self._rc_extensions:
-            # compile .RC to .RES file
-            input_opt = src
-            output_opt = "/fo" + obj
-            try:
-                self.spawn([self.rc] + pp_opts + [output_opt, input_opt])
-            except DistutilsExecError as msg:
-                raise CompileError(msg)
-            continue
-        elif ext in self._mc_extensions:
-            # Compile .MC to .RC file to .RES file.
-            #   * '-h dir' specifies the directory for the
-            #     generated include file
-            #   * '-r dir' specifies the target directory of the
-            #     generated RC file and the binary message resource
-            #     it includes
-            #
-            # For now (since there are no options to change this),
-            # we use the source-directory for the include file and
-            # the build directory for the RC file and message
-            # resources. This works at least for win32all.
-            h_dir = os.path.dirname(src)
-            rc_dir = os.path.dirname(obj)
-            try:
-                # first compile .MC to .RC and .H file
-                self.spawn([self.mc, '-h', h_dir, '-r', rc_dir, src])
-                base, _ = os.path.splitext(os.path.basename(src))
-                rc_file = os.path.join(rc_dir, base + '.rc')
-                # then compile .RC to .RES file
-                self.spawn([self.rc, "/fo" + obj, rc_file])
-
-            except DistutilsExecError as msg:
-                raise CompileError(msg)
-            continue
-        else:
-            # how to handle this file?
-            raise CompileError("Don't know how to compile {} to {}"
-                               .format(src, obj))
-
-        args = [self.cc] + compile_opts + pp_opts + d1trimfile_opts
-        if add_cpp_opts:
-            args.append('/EHsc')
-        args.append(input_opt)
-        args.append("/Fo" + obj)
-        args.extend(extra_postargs)
-
-        try:
-            self.spawn(args)
-        except DistutilsExecError as msg:
-            raise CompileError(msg)
-
-    return objects
-
-
-class custom_build_clib(build_clib.build_clib):
-
-    def build_libraries(self, libraries):
-        """
-        This function is overloaded to the original function in build_clib.py file
-        """
-
-        for (lib_name, build_info) in libraries:
-            c_library_name = self.compiler.library_filename(lib_name, lib_type='shared')
-            c_library_filename = os.path.join(self.build_clib, c_library_name)
-            dest_filename = "dpnp"  # TODO need to fix destination directory
-
-            sources = build_info.get('sources')
-            if sources is None or not isinstance(sources, (list, tuple)):
-                err_msg = f"in 'libraries' option (library '{lib_name}'),"
-                err_msg += f" 'sources' must be present and must be a list of source filenames"
-                raise DistutilsSetupError(err_msg)
-
-            sources = list(sources)
-
-            log.info(f"DPNP: building {lib_name} library")
-
-            """
-            Get the compiler environemnt
-            """
-            _cmplr_include, _cmplr_libpath = find_cmplr(verbose=True)
-            _mathlib_include, _mathlib_path = find_mathlib(verbose=True)
-            # _, _omp_libpath = find_omp(verbose=True)
-            _dpl_include, _ = find_dpl(verbose=True)
-            _py_env_include, _py_env_lib = find_python_env(verbose=True)
-
-            macros = _project_cmplr_macro
-            include_dirs = _cmplr_include + _dpl_include + _mathlib_include + _project_backend_dir + _dpctrl_include + _py_env_include
-            libraries = _mathlibs + _dpctrl_lib
-            library_dirs = _mathlib_path + _dpctrl_libpath + _py_env_lib  # + _omp_libpath
-            runtime_library_dirs = _project_rpath + _dpctrl_libpath
-            extra_preargs = _project_cmplr_flag_sycl + _sdl_cflags
-            extra_link_postargs = _project_cmplr_flag_lib
-            extra_link_preargs = _project_cmplr_flag_compatibility + _sdl_ldflags
-            force_build = _project_force_build
-            compiler = [_project_compiler]
-            linker = [_project_linker]
-            default_flags = _project_cmplr_flag_default_build
-            language = "c++"
-
-            # set compiler and options
-            self.compiler.compiler_so = compiler + default_flags
-            self.compiler.compiler = self.compiler.compiler_so
-            self.compiler.compiler_cxx = self.compiler.compiler_so
-            self.compiler.linker_so = linker + default_flags
-            self.compiler.linker_exe = self.compiler.linker_so
-
-            os.environ["CC"] = _project_compiler
-
-            objects = []
-            """
-            Build object files from sources
-            """
-            if IS_WIN:
-                self.compiler.compile = _compiler_compile
-
-            for source_it in sources:
-                obj_file_list = self.compiler.object_filenames([source_it], strip_dir=0, output_dir=self.build_temp)
-                obj_file = "".join(obj_file_list)  # convert from list to file name
-
-                newer_than_obj = newer_group([source_it], obj_file, missing="newer")
-                if force_build or newer_than_obj:
-                    if IS_WIN:
-                        obj_file_list = self.compiler.compile(self.compiler,
-                                                              [source_it],
-                                                              output_dir=self.build_temp,
-                                                              macros=macros,
-                                                              include_dirs=include_dirs,
-                                                              extra_preargs=extra_preargs,
-                                                              debug=self.debug)
-                    else:
-                        obj_file_list = self.compiler.compile([source_it],
-                                                              output_dir=self.build_temp,
-                                                              macros=macros,
-                                                              include_dirs=include_dirs,
-                                                              extra_preargs=extra_preargs,
-                                                              debug=self.debug)
-                    objects.extend(obj_file_list)
-                else:
-                    objects.append(obj_file)
-
-            """
-            Build library file from objects
-            """
-            newer_than_lib = newer_group(objects, c_library_filename, missing="newer")
-            if force_build or newer_than_lib:
-                # TODO very brute way, need to refactor
-                if IS_WIN:
-                    link_command = " ".join(compiler)
-                    link_command += " " + " ".join(default_flags)
-                    link_command += " " + " ".join(objects)  # specify *.obj files
-                    link_command += " /link"  # start linker options
-                    link_command += " " + " ".join(extra_link_preargs)
-                    link_command += " " + ".lib ".join(libraries) + ".lib"  # libraries
-                    link_command += " /LIBPATH:" + " /LIBPATH:".join(library_dirs)
-                    link_command += " /OUT:" + c_library_filename  # output file name
-                    link_command += " " + " ".join(extra_link_postargs)
-                    print(link_command)
-                    os.system(link_command)
-                else:
-                    self.compiler.link_shared_lib(objects,
-                                                  lib_name,
-                                                  output_dir=self.build_clib,
-                                                  libraries=libraries,
-                                                  library_dirs=library_dirs,
-                                                  runtime_library_dirs=runtime_library_dirs,
-                                                  extra_preargs=extra_preargs + extra_link_preargs,
-                                                  extra_postargs=extra_link_postargs,
-                                                  debug=self.debug,
-                                                  build_temp=self.build_temp,
-                                                  target_lang=language)
-
-            """
-            Copy library to the destination path
-            """
-            copy_file(c_library_filename, dest_filename, verbose=self.verbose, dry_run=self.dry_run)
-            # TODO very brute way, need to refactor
-            if c_library_filename.endswith(".dll"):
-                copy_file(c_library_filename.replace(".dll", ".lib"),
-                          dest_filename, verbose=self.verbose, dry_run=self.dry_run)
-
-            log.info(f"DPNP: building {lib_name} library finished")
diff --git a/utils/command_build_cmake_clib.py b/utils/command_build_cmake_clib.py
deleted file mode 100644
index 0ef0e240bb1e..000000000000
--- a/utils/command_build_cmake_clib.py
+++ /dev/null
@@ -1,134 +0,0 @@
-# -*- coding: utf-8 -*-
-# *****************************************************************************
-# Copyright (c) 2016-2020, Intel Corporation
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-# - Redistributions of source code must retain the above copyright notice,
-#   this list of conditions and the following disclaimer.
-# - Redistributions in binary form must reproduce the above copyright notice,
-#   this list of conditions and the following disclaimer in the documentation
-#   and/or other materials provided with the distribution.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
-# THE POSSIBILITY OF SUCH DAMAGE.
-# *****************************************************************************
-
-"""
-Module to call cmake based procedure by build_cmake_clib command
-"""
-
-import os
-import sys
-import pathlib
-from setuptools.command import build_clib
-from distutils import log
-
-
-"""
-Detect platform
-"""
-IS_WIN = False
-IS_MAC = False
-IS_LIN = False
-
-if 'linux' in sys.platform:
-    IS_LIN = True
-elif sys.platform == 'darwin':
-    IS_MAC = True
-elif sys.platform in ['win32', 'cygwin']:
-    IS_WIN = True
-else:
-    raise EnvironmentError("DPNP cmake builder: " + sys.platform + " not supported")
-
-
-"""
-Detect external SYCL queue manager
-"""
-_dpctrl_include_dir = "No_sycl_queue_mgr_include_dir"
-_dpctrl_library_dir = "No_sycl_queue_mgr_library_dir"
-_dpctrl_exists = "OFF"
-try:
-    """
-    Detect external SYCL queue handling library
-    """
-    import dpctl
-
-    _dpctrl_include_dir = str(os.path.abspath(dpctl.get_include()))
-    _dpctrl_library_dir = str(os.path.abspath(os.path.join(dpctl.get_include(), "..")))
-    _dpctrl_exists = "ON"
-except ImportError:
-    """
-    Set local SYCL queue handler set by default in CmakeList.txt
-    """
-    pass
-
-"""
-Detect enabling DPNP backend tests
-"""
-_dpnp_backend_tests_enable = os.environ.get('DPNP_BACKEND_TESTS_ENABLE', None)
-
-
-"""
-CmakeList.txt based build_clib
-"""
-
-
-class custom_build_cmake_clib(build_clib.build_clib):
-    def run(self):
-        root_dir = os.path.normpath(os.path.join(os.path.dirname(__file__), ".."))
-        log.info(f"Project directory is: {root_dir}")
-
-        backend_directory = os.path.join(root_dir, "dpnp", "backend")
-        install_directory = os.path.join(root_dir, "dpnp")
-
-        build_temp = pathlib.Path(self.build_temp)
-        build_temp.mkdir(parents=True, exist_ok=True)
-        abs_build_temp_path = str(os.path.abspath(build_temp))
-        log.info(f"build directory is: {abs_build_temp_path}")
-
-        config = "Debug" if self.debug else "Release"
-
-        cmake_generator = str()
-        enable_tests = "OFF"
-
-        if IS_WIN:
-            cmake_generator = "-GNinja"
-        if _dpnp_backend_tests_enable is not None:
-            enable_tests = "ON"
-
-        cmake_args = [
-            cmake_generator,
-            "-S" + backend_directory,
-            "-B" + abs_build_temp_path,
-            "-DCMAKE_BUILD_TYPE=" + config,
-            "-DDPNP_INSTALL_PREFIX=" + install_directory.replace(os.sep, "/"),  # adjust to cmake requirenments
-            "-DDPNP_INSTALL_STRUCTURED=OFF",
-            # "-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=" + install_directory,
-            "-DDPNP_SYCL_QUEUE_MGR_ENABLE:BOOL=" + _dpctrl_exists,
-            "-DDPNP_QUEUEMGR_INCLUDE_DIR=" + _dpctrl_include_dir,
-            "-DDPNP_QUEUEMGR_LIB_DIR=" + _dpctrl_library_dir,
-            "-DCMAKE_VERBOSE_MAKEFILE:BOOL=ON",
-            "-DDPNP_BACKEND_TESTS:BOOL=" + enable_tests
-        ]
-
-        # didn't find how to add it inside cmake, that is why this is here
-        import multiprocessing
-        cpu_count = multiprocessing.cpu_count()
-        # possible that jobs count must be +-1 against CPUs count
-        jobs = "-j" + str(cpu_count)
-
-        self.spawn(["cmake"] + cmake_args + [backend_directory])
-        if not self.dry_run:
-            self.spawn(["cmake", "--build", abs_build_temp_path, jobs])
-            self.spawn(["cmake", "--install", abs_build_temp_path])
diff --git a/utils/command_clean.py b/utils/command_clean.py
deleted file mode 100644
index 785340aa4023..000000000000
--- a/utils/command_clean.py
+++ /dev/null
@@ -1,84 +0,0 @@
-# -*- coding: utf-8 -*-
-# *****************************************************************************
-# Copyright (c) 2016-2020, Intel Corporation
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-# - Redistributions of source code must retain the above copyright notice,
-#   this list of conditions and the following disclaimer.
-# - Redistributions in binary form must reproduce the above copyright notice,
-#   this list of conditions and the following disclaimer in the documentation
-#   and/or other materials provided with the distribution.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
-# THE POSSIBILITY OF SUCH DAMAGE.
-# *****************************************************************************
-
-import os
-from setuptools import Command
-from fnmatch import fnmatch
-from shutil import rmtree
-from distutils import log
-
-
-class source_clean(Command):
-    """
-    Command to clean all generated files in the project
-
-    Usage:
-        To run the command: python ./setup.py clean
-    """
-
-    description = "Clean up the project source tree"
-
-    CLEAN_ROOTDIRS = ['build', 'build_cython', 'cython_debug', 'Intel_NumPy.egg-info', 'doc/_build', 'CMakeFiles']
-    CLEAN_DIRS = ['__pycache__']
-    CLEAN_FILES = ['*.so', '*.pyc', '*.pyd', '*.dll', '*.lib', 'CMakeCache.txt']
-
-    user_options = []
-
-    def initialize_options(self):
-        pass
-
-    def finalize_options(self):
-        pass
-
-    def run(self):
-        root_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
-        log.info(f"DPNP: cleaning in {root_dir}")
-
-        # removing dirs from root_dir
-        for dir_mask in self.CLEAN_ROOTDIRS:
-            rdir = os.path.join(root_dir, dir_mask)
-            if os.path.isdir(rdir):
-                log.info(f"rm {rdir}")
-                rmtree(rdir)
-
-        for (dirpath, dirnames, filenames) in os.walk(root_dir):
-            # removing subdirs
-            for dir in dirnames:
-                for dir_mask in self.CLEAN_DIRS:
-                    if fnmatch(dir, dir_mask):
-                        rdir = os.path.join(dirpath, dir)
-                        log.info(f"rm {rdir}")
-                        rmtree(rdir)
-
-            # removing files
-            for file in filenames:
-                for file_mask in self.CLEAN_FILES:
-                    if fnmatch(file, file_mask):
-                        rfile = os.path.join(dirpath, file)
-                        log.info(f"rm {rfile}")
-                        os.remove(rfile)
-
-        log.info(f"DPNP: cleaning finished")
diff --git a/utils/command_style.py b/utils/command_style.py
deleted file mode 100644
index 4d5a48ed5683..000000000000
--- a/utils/command_style.py
+++ /dev/null
@@ -1,155 +0,0 @@
-# -*- coding: utf-8 -*-
-# *****************************************************************************
-# Copyright (c) 2016-2020, Intel Corporation
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-# - Redistributions of source code must retain the above copyright notice,
-#   this list of conditions and the following disclaimer.
-# - Redistributions in binary form must reproduce the above copyright notice,
-#   this list of conditions and the following disclaimer in the documentation
-#   and/or other materials provided with the distribution.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
-# THE POSSIBILITY OF SUCH DAMAGE.
-# *****************************************************************************
-
-import os
-from setuptools import Command
-
-
-class source_style(Command):
-    """
-    Command to check and adjust code style
-
-    Usage:
-        To check style: python ./setup.py style
-        To fix style: python ./setup.py style -a
-
-    """
-
-    user_options = [
-        ('apply', 'a', 'Apply codestyle changes to sources.')
-    ]
-    description = "Code style check and apply (with -a)"
-    boolean_options = []
-
-    _result_marker = "Result:"
-    _project_directory_excluded = ['build', '.git']
-
-    _c_formatter = 'clang-format'
-    _c_formatter_install_msg = 'pip install clang'
-    _c_formatter_command_line = [_c_formatter, '-style=file']
-    _c_file_extensions = ['.h', '.c', '.hpp', '.cpp']
-
-    _py_checker = 'pycodestyle'
-    _py_formatter = 'autopep8'
-    _py_formatter_install_msg = 'pip install --upgrade autopep8\npip install --upgrade pycodestyle'
-    _py_checker_command_line = [_py_checker]
-    _py_formatter_command_line = [
-        _py_formatter,
-        '--in-place']
-    _py_file_extensions = ['.py', '.pyx', '.pxd', '.pxi']
-
-    def _get_file_list(self, path, search_extentions):
-        """ Return file list to be adjusted or checked
-
-        path - is the project base path
-        search_extentions - list of strings with files extension to search recurcivly
-        """
-        files = []
-        exluded_directories_full_path = [os.path.join(
-            path, excluded_dir) for excluded_dir in self._project_directory_excluded]
-
-        # r=root, d=directories, f = files
-        for r, d, f in os.walk(path):
-            # match exclude pattern in current directory
-            found = False
-            for excluded_dir in exluded_directories_full_path:
-                if r.find(excluded_dir) >= 0:
-                    found = True
-
-            if found:
-                continue
-
-            for file in f:
-                filename, extention = os.path.splitext(file)
-                if extention in search_extentions:
-                    files.append(os.path.join(r, file))
-
-        return files
-
-    def initialize_options(self):
-        self.apply = 0
-
-    def finalize_options(self):
-        pass
-
-    def run(self):
-        root_dir = os.path.join(os.path.dirname(__file__), "..")
-        print("Project directory is: %s" % root_dir)
-
-        if self.apply:
-            self._c_formatter_command_line += ['-i']
-        else:
-            self._c_formatter_command_line += ['-output-replacements-xml']
-
-        import subprocess
-
-        bad_style_file_names = []
-
-        # C files handling
-        c_files = self._get_file_list(root_dir, self._c_file_extensions)
-        try:
-            for f in c_files:
-                command_output = subprocess.Popen(
-                    self._c_formatter_command_line + [f], stdout=subprocess.PIPE)
-                command_cout, command_cerr = command_output.communicate()
-                if not self.apply:
-                    if command_cout.find(b'<replacement ') > 0:
-                        bad_style_file_names.append(f)
-        except BaseException as original_error:
-            print("%s is not installed.\nPlease use: %s" %
-                  (self._c_formatter, self._c_formatter_install_msg))
-            print("Original error message is:\n", original_error)
-            exit(1)
-
-        # Python files handling
-        py_files = self._get_file_list(root_dir, self._py_file_extensions)
-        try:
-            for f in py_files:
-                if not self.apply:
-                    command_output = subprocess.Popen(
-                        self._py_checker_command_line + [f])
-                    returncode = command_output.wait()
-                    if returncode != 0:
-                        bad_style_file_names.append(f)
-                else:
-                    command_output = subprocess.Popen(
-                        self._py_formatter_command_line + [f])
-                    command_output.wait()
-        except BaseException as original_error:
-            print("%s is not installed.\nPlease use: %s" %
-                  (self._py_formatter, self._py_formatter_install_msg))
-            print("Original error message is:\n", original_error)
-            exit(1)
-
-        if bad_style_file_names:
-            print("Following files style need to be adjusted:")
-            for line in bad_style_file_names:
-                print(line)
-            print("%s Style check failed" % self._result_marker)
-            exit(1)
-        else:
-            print("%s Style check passed" % self._result_marker)
-            exit(0)
diff --git a/utils/dpnp_build_utils.py b/utils/dpnp_build_utils.py
deleted file mode 100644
index 2ccf211587d3..000000000000
--- a/utils/dpnp_build_utils.py
+++ /dev/null
@@ -1,397 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# *****************************************************************************
-# Copyright (c) 2016-2020, Intel Corporation
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-# - Redistributions of source code must retain the above copyright notice,
-#   this list of conditions and the following disclaimer.
-# - Redistributions in binary form must reproduce the above copyright notice,
-#   this list of conditions and the following disclaimer in the documentation
-#   and/or other materials provided with the distribution.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
-# THE POSSIBILITY OF SUCH DAMAGE.
-# *****************************************************************************
-
-import os
-import sys
-
-
-IS_CONDA_BUILD = os.environ.get("CONDA_BUILD") == "1"
-
-
-def find_library(var_name, rel_header_paths, rel_lib_paths,
-                 rel_include_path="include", rel_libdir_path="lib", verbose=False):
-    """
-    Find specified libraries/headers in the directory from the environment variable.
-
-    Parameters
-    ----------
-    var_name : str
-        the name of the environment variable
-    rel_header_paths : list(str)
-        relative paths to required headers
-    rel_lib_paths : list(str)
-        relative paths to required libraries
-    rel_include_path : str
-        relative path to the include directory
-    rel_libdir_path : str
-        relative path to the library directory
-    verbose : bool
-        to print paths to include and library directories
-
-    Returns
-    -------
-    tuple(list(str), list(str))
-        path to include directory, path to library directory
-    """
-    root_dir = os.getenv(var_name)
-    if root_dir is None:
-        return [], []
-
-    include_find = os.path.join(root_dir, rel_include_path)
-    libpath_find = os.path.join(root_dir, rel_libdir_path)
-    required_headers = [os.path.join(include_find, rel_path) for rel_path in rel_header_paths]
-    required_libs = [os.path.join(libpath_find, rel_path) for rel_path in rel_lib_paths]
-
-    for required_file in required_headers + required_libs:
-        if not os.path.exists(required_file):
-            return [], []
-
-    if verbose:
-        msg_template = "DPNP: using ${} based library. include={}, libpath={}"
-        print(msg_template.format(var_name, include_find, libpath_find))
-
-    return [include_find], [libpath_find]
-
-
-def find_cmplr(verbose=False):
-    """
-    Find compiler.
-
-    Parameters
-    ----------
-    verbose : bool
-        to print paths to include and library directories
-
-    Returns
-    -------
-    tuple(list(str), list(str))
-        path to include directory, path to library directory
-    """
-
-    rel_header_paths = rel_lib_paths = []
-
-    # try to find library in specified directory from $DPCPPROOT
-    if 'linux' in sys.platform:
-        rel_include_path = os.path.join('linux', 'include')
-        rel_libdir_path = os.path.join('linux', 'lib')
-    elif sys.platform in ['win32', 'cygwin']:
-        rel_include_path = os.path.join('windows', 'include')
-        rel_libdir_path = os.path.join('windows', 'lib')
-    else:
-        raise EnvironmentError("DPNP: " + sys.platform + " not supported")
-
-    cmplr_include, cmplr_libpath = find_library("DPCPPROOT", rel_header_paths, rel_lib_paths,
-                                                rel_include_path=rel_include_path,
-                                                rel_libdir_path=rel_libdir_path,
-                                                verbose=verbose)
-
-    # try to find library in specified directory from $ONEAPI_ROOT
-    if not cmplr_include or not cmplr_libpath:
-        if sys.platform in ['linux']:
-            rel_include_path = os.path.join('compiler', 'latest', 'linux', 'include')
-            rel_libdir_path = os.path.join('compiler', 'latest', 'linux', 'lib')
-        elif sys.platform in ['win32', 'cygwin']:
-            rel_include_path = os.path.join('compiler', 'latest', 'windows', 'include')
-            rel_libdir_path = os.path.join('compiler', 'latest', 'windows', 'lib')
-        else:
-            raise EnvironmentError("DPNP: " + sys.platform + " not supported")
-
-        cmplr_include, cmplr_libpath = find_library("ONEAPI_ROOT", rel_header_paths, rel_lib_paths,
-                                                    rel_include_path=rel_include_path,
-                                                    rel_libdir_path=rel_libdir_path,
-                                                    verbose=verbose)
-
-    # try to find in Python environment
-    if not cmplr_include or not cmplr_libpath:
-        if sys.platform in ['linux']:
-            rel_include_path = os.path.join('include')
-            rel_libdir_path = os.path.join('lib')
-        elif sys.platform in ['win32', 'cygwin']:
-            rel_include_path = os.path.join('Library', 'include')
-            rel_libdir_path = os.path.join('Library', 'lib')
-        else:
-            raise EnvironmentError("DPNP: " + sys.platform + " not supported")
-
-        conda_root_var = "PREFIX" if IS_CONDA_BUILD else "CONDA_PREFIX"
-
-        cmplr_include, cmplr_libpath = find_library(conda_root_var, rel_header_paths, rel_lib_paths,
-                                                    rel_include_path=rel_include_path,
-                                                    rel_libdir_path=rel_libdir_path,
-                                                    verbose=verbose)
-
-    if not cmplr_include or not cmplr_libpath:
-        raise EnvironmentError("DPNP: Unable to find compiler")
-
-    return cmplr_include, cmplr_libpath
-
-
-def find_dpl(verbose=False):
-    """
-    Find DPL.
-
-    Parameters
-    ----------
-    verbose : bool
-        to print paths to include and library directories
-
-    Returns
-    -------
-    tuple(list(str), list(str))
-        path to include directory, path to library directory
-    """
-
-    rel_header_paths = [os.path.join("oneapi", "dpl", "algorithm")]
-    rel_lib_paths = []
-    rel_libdir_path = ""
-
-    # try to find library in specified directory from $DPLROOT like a repository
-    rel_include_path = os.path.join('include')
-
-    dpl_include, dpl_libpath = find_library("DPLROOT", rel_header_paths, rel_lib_paths,
-                                            rel_include_path=rel_include_path,
-                                            rel_libdir_path=rel_libdir_path,
-                                            verbose=verbose)
-
-    # try to find library in specified directory from $DPLROOT
-    if not dpl_include or not dpl_libpath:
-        if 'linux' in sys.platform:
-            rel_include_path = os.path.join('linux', 'include')
-        elif sys.platform in ['win32', 'cygwin']:
-            rel_include_path = os.path.join('windows', 'include')
-        else:
-            raise EnvironmentError("DPNP: " + sys.platform + " not supported")
-
-        dpl_include, dpl_libpath = find_library("DPLROOT", rel_header_paths, rel_lib_paths,
-                                                rel_include_path=rel_include_path,
-                                                rel_libdir_path=rel_libdir_path,
-                                                verbose=verbose)
-
-    # try to find library in specified directory from $ONEAPI_ROOT
-    if not dpl_include or not dpl_libpath:
-        if sys.platform in ['linux']:
-            rel_include_path = os.path.join('dpl', 'latest', 'linux', 'include')
-        elif sys.platform in ['win32', 'cygwin']:
-            rel_include_path = os.path.join('dpl', 'latest', 'windows', 'include')
-        else:
-            raise EnvironmentError("DPNP: " + sys.platform + " not supported")
-
-        dpl_include, dpl_libpath = find_library("ONEAPI_ROOT", rel_header_paths, rel_lib_paths,
-                                                rel_include_path=rel_include_path,
-                                                rel_libdir_path=rel_libdir_path,
-                                                verbose=verbose)
-
-    # try to find in Python environment
-    if not dpl_include or not dpl_libpath:
-        if sys.platform in ['linux']:
-            rel_include_path = os.path.join('include')
-        elif sys.platform in ['win32', 'cygwin']:
-            rel_include_path = os.path.join('Library', 'include')
-        else:
-            raise EnvironmentError("DPNP: " + sys.platform + " not supported")
-
-        conda_root_var = "PREFIX" if IS_CONDA_BUILD else "CONDA_PREFIX"
-
-        dpl_include, dpl_libpath = find_library(conda_root_var, rel_header_paths, rel_lib_paths,
-                                                rel_include_path=rel_include_path,
-                                                rel_libdir_path=rel_libdir_path,
-                                                verbose=verbose)
-
-    if not dpl_include or not dpl_libpath:
-        raise EnvironmentError("DPNP: Unable to find DPL")
-
-    return dpl_include, dpl_libpath
-
-
-def find_mathlib(verbose=False):
-    """
-    Find mathlib.
-
-    Parameters
-    ----------
-    verbose : bool
-        to print paths to include and library directories
-
-    Returns
-    -------
-    tuple(list(str), list(str))
-        path to include directory, path to library directory
-    """
-
-    if sys.platform in ['linux']:
-        rel_header_paths = [os.path.join("oneapi", "mkl.hpp")]
-        rel_lib_paths = ["libmkl_sycl.so"]
-    elif sys.platform in ['win32', 'cygwin']:
-        rel_header_paths = [os.path.join("oneapi", "mkl.hpp")]
-        rel_lib_paths = ["mkl_sycl_dll.lib"]
-    else:
-        raise EnvironmentError("DPNP: " + sys.platform + " not supported")
-
-    # try to find library in specified directory from $MKLROOT
-    if sys.platform in ['linux']:
-        rel_include_path = os.path.join('linux', 'include')
-        rel_libdir_path = os.path.join('linux', 'lib')
-    elif sys.platform in ['win32', 'cygwin']:
-        rel_include_path = os.path.join('windows', 'include')
-        rel_libdir_path = os.path.join('windows', 'lib')
-    else:
-        raise EnvironmentError("DPNP: " + sys.platform + " not supported")
-
-    mathlib_include, mathlib_path = find_library("MKLROOT", rel_header_paths, rel_lib_paths,
-                                                 rel_include_path=rel_include_path,
-                                                 rel_libdir_path=rel_libdir_path,
-                                                 verbose=verbose)
-
-    # try to find library in specified directory from $ONEAPI_ROOT
-    if not mathlib_include or not mathlib_path:
-        if sys.platform in ['linux']:
-            rel_include_path = os.path.join('mkl', 'latest', 'linux', 'include')
-            rel_libdir_path = os.path.join('mkl', 'latest', 'linux', 'lib')
-        elif sys.platform in ['win32', 'cygwin']:
-            rel_include_path = os.path.join('mkl', 'latest', 'windows', 'include')
-            rel_libdir_path = os.path.join('mkl', 'latest', 'windows', 'lib')
-        else:
-            raise EnvironmentError("DPNP: " + sys.platform + " not supported")
-
-        mathlib_include, mathlib_path = find_library("ONEAPI_ROOT", rel_header_paths, rel_lib_paths,
-                                                     rel_include_path=rel_include_path,
-                                                     rel_libdir_path=rel_libdir_path,
-                                                     verbose=verbose)
-
-    # try to find in Python environment
-    if not mathlib_include or not mathlib_path:
-        if sys.platform in ['linux']:
-            rel_include_path = os.path.join('include')
-            rel_libdir_path = os.path.join('lib')
-        elif sys.platform in ['win32', 'cygwin']:
-            rel_include_path = os.path.join('Library', 'include')
-            rel_libdir_path = os.path.join('Library', 'lib')
-        else:
-            raise EnvironmentError("DPNP: " + sys.platform + " not supported")
-
-        conda_root_var = "PREFIX" if IS_CONDA_BUILD else "CONDA_PREFIX"
-
-        mathlib_include, mathlib_path = find_library(conda_root_var, rel_header_paths, rel_lib_paths,
-                                                     rel_include_path=rel_include_path,
-                                                     rel_libdir_path=rel_libdir_path,
-                                                     verbose=verbose)
-
-    if not mathlib_include or not mathlib_path:
-        raise EnvironmentError("DPNP: Unable to find math library")
-
-    return mathlib_include, mathlib_path
-
-
-def _find_omp_in_dpcpp_root(verbose=False):
-    """
-    Find omp in dpcpp root using $DPCPPROOT.
-
-    Parameters
-    ----------
-    verbose : bool
-        to print paths to include and library directories
-
-    Returns
-    -------
-    tuple(list(str), list(str))
-        path to include directory, path to library directory
-    """
-    rel_header_paths = rel_lib_paths = []
-
-    if 'linux' in sys.platform:
-        rel_include_path = os.path.join('linux', 'compiler', 'include')
-        rel_libdir_path = os.path.join('linux', 'compiler', 'lib', 'intel64')
-    elif sys.platform in ['win32', 'cygwin']:
-        rel_include_path = os.path.join('windows', 'compiler', 'include')
-        rel_libdir_path = os.path.join('windows', 'compiler', 'lib', 'intel64_win')
-    else:
-        rel_include_path, rel_libdir_path = 'include', 'lib'
-
-    return find_library("DPCPPROOT", rel_header_paths, rel_lib_paths,
-                        rel_include_path=rel_include_path, rel_libdir_path=rel_libdir_path, verbose=verbose)
-
-
-def find_omp(verbose=False):
-    """
-    Find omp in environment.
-
-    Parameters
-    ----------
-    verbose : bool
-        to print paths to include and library directories
-
-    Returns
-    -------
-    tuple(list(str), list(str))
-        path to include directory, path to library directory
-    """
-    omp_include, omp_libpath = _find_omp_in_dpcpp_root(verbose=verbose)
-
-    if not omp_include or not omp_libpath:
-        raise EnvironmentError(f"DPNP: Unable to find omp. Please install Intel OneAPI environment")
-
-    return omp_include, omp_libpath
-
-
-def find_python_env(verbose=False):
-    """
-    Find Python environment.
-
-    Parameters
-    ----------
-    verbose : bool
-        to print paths to include and library directories
-
-    Returns
-    -------
-    tuple(list(str), list(str))
-        path to include directory, path to library directory
-    """
-
-    rel_header_paths = rel_lib_paths = []
-
-    if sys.platform in ['linux']:
-        rel_include_path = os.path.join('include')
-        rel_libdir_path = os.path.join('lib')
-    elif sys.platform in ['win32', 'cygwin']:
-        rel_include_path = os.path.join('Library', 'include')
-        rel_libdir_path = os.path.join('Library', 'lib')
-    else:
-        raise EnvironmentError("DPNP: " + sys.platform + " not supported")
-
-    conda_root_var = "PREFIX" if IS_CONDA_BUILD else "CONDA_PREFIX"
-
-    env_include, env_path = find_library(conda_root_var, rel_header_paths, rel_lib_paths,
-                                         rel_include_path=rel_include_path,
-                                         rel_libdir_path=rel_libdir_path,
-                                         verbose=verbose)
-
-    env_include += [os.path.join(os.getenv(conda_root_var), 'include')]
-
-    if not env_include or not env_path:
-        raise EnvironmentError(f"DPNP: Unable to find Python environment paths")
-
-    return env_include, env_path
diff --git a/utils/dpnp_coverage.py b/utils/dpnp_coverage.py
deleted file mode 100644
index 5959fe81b863..000000000000
--- a/utils/dpnp_coverage.py
+++ /dev/null
@@ -1,154 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# *****************************************************************************
-# Copyright (c) 2016-2020, Intel Corporation
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-# - Redistributions of source code must retain the above copyright notice,
-#   this list of conditions and the following disclaimer.
-# - Redistributions in binary form must reproduce the above copyright notice,
-#   this list of conditions and the following disclaimer in the documentation
-#   and/or other materials provided with the distribution.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
-# THE POSSIBILITY OF SUCH DAMAGE.
-# *****************************************************************************
-
-import os
-import inspect
-
-name_dict = {}
-module_names_set = dict()
-extra_modules = ["fft", "linalg", "random", "char"]
-sep = ":"
-
-col0_width = 4
-col1_width = 40
-col2_width = 60
-
-
-def print_header_line():
-    print(f"{'='*col0_width}", end=sep)
-    print(f"{'='*col1_width}", end=sep)
-    for mod_name in module_names_set.keys():
-        print(f"{'='*col2_width}", end=sep)
-    print()
-
-
-def print_header():
-    print_header_line()
-
-    print("#".center(col0_width), end=sep)
-    print("Name".center(col1_width), end=sep)
-    for mod_name in module_names_set.keys():
-        print(mod_name.center(col2_width), end=sep)
-    print()
-
-    print_header_line()
-
-
-def print_footer():
-    print_header_line()
-
-    print("".center(col0_width), end=sep)
-    print("".center(col1_width), end=sep)
-    for mod_name, mod_sym_count in module_names_set.items():
-        count_str = mod_name + " total " + str(mod_sym_count)
-        print(count_str.rjust(col2_width), end=sep)
-    print()
-
-    print_header_line()
-
-
-def add_symbol(item_name, module_name, item_val):
-    if item_name not in name_dict.keys():
-        name_dict[item_name] = dict()
-    if not name_dict[item_name].get(module_name, False):
-        name_dict[item_name][module_name] = str(item_val)
-
-        if module_name not in module_names_set.keys():
-            module_names_set[module_name] = 0
-        else:
-            module_names_set[module_name] += 1
-#     else:
-#         print(f"item_name={item_name}, {name_dict[item_name][module_name]} replaced with {str(item_val)}")
-
-
-def fill_data(module_name, module_obj, parent_module_name=""):
-    for item_name_raw, item_val in inspect.getmembers(module_obj):
-        if (item_name_raw[0] == "_"):
-            continue
-
-        item_name = os.path.join(parent_module_name, item_name_raw)
-        if getattr(item_val, '__call__', False):
-            str_item = item_val
-            try:
-                str_item = inspect.signature(item_val)
-            except ValueError:
-                pass
-            add_symbol(item_name, module_name, str_item)
-        elif inspect.ismodule(item_val):
-            if item_name in extra_modules:
-                fill_data(module_name, item_val, parent_module_name=item_name)
-            else:
-                print(f"IGNORED: {module_name}: module: {item_name}")
-#         elif isinstance(item_val, (tuple, list, float, int)):
-#             add_symbol(item_name, module_name, item_val)
-#         elif isinstance(item_val, str):
-#             add_symbol(item_name, module_name, item_val.replace('\n', '').strip())
-#         else:
-#             add_symbol(item_name, module_name, type(item_val))
-#             print(f"Symbol {item_name} unrecognized. Symbol: {item_val}, type: {type(item_val)}")
-
-
-def print_data():
-    print_header()
-
-    symbol_id = 0
-    for symbol_name, symbol_values in sorted(name_dict.items()):
-        print(f"{symbol_id:<{col0_width}}", end=sep)
-        symbol_id += 1
-        print(f"{symbol_name:{col1_width}}", end=sep)
-
-        for mod_name in module_names_set.keys():
-            val = symbol_values.get(mod_name, "")
-            val_prn = str(val)[0:col2_width - 1]
-            print(f"{val_prn:{col2_width}}", end=sep)
-
-        print()
-
-    print_footer()
-
-
-if __name__ == '__main__':
-
-    try:
-        import dpnp
-        fill_data("DPNP", dpnp)
-    except ImportError:
-        print("No DPNP module loaded")
-
-    try:
-        import numpy
-        fill_data("NumPy", numpy)
-    except ImportError:
-        print("No NumPy module loaded")
-
-    try:
-        import cupy
-        fill_data("cuPy", cupy)
-    except ImportError:
-        print("No cuPy module loaded")
-
-    print_data()

From 68683d9b9c71222d0c467510791b52bfeeff5b85 Mon Sep 17 00:00:00 2001
From: Oleksandr Pavlyk <oleksandr.pavlyk@intel.com>
Date: Wed, 22 Mar 2023 11:18:06 -0500
Subject: [PATCH 010/129] Added docs and defaults for supported options

---
 CMakeLists.txt | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 5ff196915a8a..daf2c28be9b2 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -8,6 +8,9 @@ project(dpnp
   DESCRIPTION "NumPy-like API accelerated by SYCL."
 )
 
+option(DPNP_GENERATE_COVERAGE "Enable build DPNP with coverage instrumentation" FALSE)
+option(DPNP_BACKEND_TESTS "Enable building of DPNP backend test suite" FALSE)
+
 set(CMAKE_CXX_STANDARD 17)
 set(CMAKE_CXX_STANDARD_REQUIRED True)
 set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE BOTH)

From e12576c4207fe46edd333242bdefbaf6d9c53882 Mon Sep 17 00:00:00 2001
From: Oleksandr Pavlyk <oleksandr.pavlyk@intel.com>
Date: Wed, 22 Mar 2023 11:18:46 -0500
Subject: [PATCH 011/129] Include dpnp/backend/tests if DPNP_BACKEND_TESTS is
 set

---
 dpnp/backend/CMakeLists.txt | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/dpnp/backend/CMakeLists.txt b/dpnp/backend/CMakeLists.txt
index 744768aa158d..55263d6ea2e0 100644
--- a/dpnp/backend/CMakeLists.txt
+++ b/dpnp/backend/CMakeLists.txt
@@ -81,6 +81,11 @@ add_library(dpnp_backend_library INTERFACE IMPORTED GLOBAL)
 target_include_directories(dpnp_backend_library BEFORE INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/include ${CMAKE_CURRENT_SOURCE_DIR}/src)
 target_link_libraries(dpnp_backend_library INTERFACE ${_trgt})
 
+if(DPNP_BACKEND_TESTS)
+  add_subdirectory(tests)
+endif()
+
+
 install(
   TARGETS ${_trgt}
   LIBRARY DESTINATION dpnp

From 9626bb85c3b83e6df7cb85f27b7e1314f37c59b3 Mon Sep 17 00:00:00 2001
From: Oleksandr Pavlyk <oleksandr.pavlyk@intel.com>
Date: Wed, 22 Mar 2023 11:18:59 -0500
Subject: [PATCH 012/129] Ignore _skbuild/ folder

---
 .gitignore | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 7ed68aab8567..8beb38f1efd6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,5 @@
 # CMake build and local install directory
-build
+_skbuild
 build_cython
 dpnp.egg-info
 

From 9796c481d8e95d6ead95fb90c9d095ebde29712f Mon Sep 17 00:00:00 2001
From: Oleksandr Pavlyk <oleksandr.pavlyk@intel.com>
Date: Wed, 22 Mar 2023 11:19:42 -0500
Subject: [PATCH 013/129] Use interface library dpnp_backend_library instead of
 library target dpnp_backend_c in target_link_libraries for CTest

---
 dpnp/backend/tests/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dpnp/backend/tests/CMakeLists.txt b/dpnp/backend/tests/CMakeLists.txt
index e0a4936d02d3..8729b76a6845 100644
--- a/dpnp/backend/tests/CMakeLists.txt
+++ b/dpnp/backend/tests/CMakeLists.txt
@@ -51,7 +51,7 @@ add_executable(dpnpc_tests
                test_random.cpp
                test_utils.cpp
                test_utils_iterator.cpp)
-target_link_libraries(dpnpc_tests GTest::GTest GTest::Main pthread dpnp_backend_c)
+target_link_libraries(dpnpc_tests GTest::GTest GTest::Main pthread dpnp_backend_library)
 
 # TODO split
 add_test(dpnpc_tests dpnpc_tests)

From bb6d2b04bf5ed5353dc034ada2fae66153b33956 Mon Sep 17 00:00:00 2001
From: Oleksandr Pavlyk <oleksandr.pavlyk@intel.com>
Date: Wed, 22 Mar 2023 11:22:30 -0500
Subject: [PATCH 014/129] Need scikit-build in host environment

---
 conda-recipe/meta.yaml | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml
index 3f8db36ecc98..875742c1cf2a 100644
--- a/conda-recipe/meta.yaml
+++ b/conda-recipe/meta.yaml
@@ -10,18 +10,19 @@ requirements:
       - setuptools
       - numpy >=1.19,<1.25a0
       - cython
-      - cmake >=3.19
-      - dpctl >=0.14
+      - cmake >=3.21
+      - dpctl >=0.14.2
       - mkl-devel-dpcpp {{ environ.get('MKL_VER', '>=2023.0.0') }}
       - onedpl-devel
       - tbb-devel
       - wheel
+      - scikit-build
     build:
       - {{ compiler('cxx') }}
       - {{ compiler('dpcpp') }}  >=2023.0  # [not osx]
     run:
       - python
-      - dpctl >=0.14
+      - dpctl >=0.14.2
       - {{ pin_compatible('dpcpp-cpp-rt', min_pin='x.x', max_pin='x') }}
       - {{ pin_compatible('mkl-dpcpp', min_pin='x.x', max_pin='x') }}
       - {{ pin_compatible('numpy', min_pin='x.x', max_pin='x') }}
@@ -30,11 +31,6 @@ build:
     number: {{ GIT_DESCRIBE_NUMBER }}
     include_recipe: False
     script_env:
-      - ONEAPI_ROOT
-      - DPCPPROOT
-      - MKLROOT
-      - TBBROOT
-      - DPLROOT
       - WHEELS_OUTPUT_FOLDER
 
 test:
@@ -42,9 +38,9 @@ test:
       - pytest
       - setuptools
     source_files:
-        - examples
-        - tests
-        - setup.cfg
+      - examples
+      - tests
+      - setup.cfg
     commands:
       - python -c "import dpnp"
       - pytest -s

From 98666742e2ad3bbcbc09540b2ae79599a4cac81c Mon Sep 17 00:00:00 2001
From: Oleksandr Pavlyk <oleksandr.pavlyk@intel.com>
Date: Wed, 22 Mar 2023 11:23:01 -0500
Subject: [PATCH 015/129] Removing old CMakeLists.txt

---
 dpnp/backend/saved_old_CMakeLists.txt | 354 --------------------------
 1 file changed, 354 deletions(-)
 delete mode 100644 dpnp/backend/saved_old_CMakeLists.txt

diff --git a/dpnp/backend/saved_old_CMakeLists.txt b/dpnp/backend/saved_old_CMakeLists.txt
deleted file mode 100644
index 330eb1030f3d..000000000000
--- a/dpnp/backend/saved_old_CMakeLists.txt
+++ /dev/null
@@ -1,354 +0,0 @@
-# *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-# - Redistributions of source code must retain the above copyright notice,
-#   this list of conditions and the following disclaimer.
-# - Redistributions in binary form must reproduce the above copyright notice,
-#   this list of conditions and the following disclaimer in the documentation
-#   and/or other materials provided with the distribution.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
-# THE POSSIBILITY OF SUCH DAMAGE.
-# *****************************************************************************
-
-# cmake-format -i CMakeLists.txt --line-width=120
-
-cmake_minimum_required(VERSION 3.10 FATAL_ERROR)
-
-# set(DPNP_VERSION 0.11.1)
-# set(DPNP_API_VERSION 0.11)
-
-# set directory where the custom finders live
-set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/Modules")
-
-# -----------------------------------------------------------------------------------------------
-# Project build options...
-# -----------------------------------------------------------------------------------------------
-if(DEFINED ENV{ONEAPI_ROOT})
-  set(DPNP_ONEAPI_ROOT "$ENV{ONEAPI_ROOT}" CACHE PATH "Folder contains oneapi tool set")
-endif()
-
-option(DPNP_STATIC_LIB_ENABLE "Enable build DPNP static library" FALSE)
-option(DPNP_DEBUG_ENABLE "Enable output for DPNP_DEBUG statements" FALSE)
-option(DPNP_INSTALL_STRUCTURED "if FALSE, install package files into same directory" TRUE)
-option(DPNP_SYCL_QUEUE_MGR_ENABLE "Use external manager for SYCL queue" FALSE)
-option(DPNP_BACKEND_TESTS "Enable DPNP tests" FALSE)
-
-if(DEFINED ENV{DPNP_DEBUG})
-  set(DPNP_DEBUG_ENABLE $ENV{DPNP_DEBUG})
-endif()
-
-message(STATUS "CMAKE_VERSION:                   ${CMAKE_VERSION}")
-message(STATUS "CMAKE_GENERATOR:                 ${CMAKE_GENERATOR}")
-message(STATUS "CMAKE_HOST_SYSTEM_NAME:          ${CMAKE_HOST_SYSTEM_NAME}")
-message(STATUS "========== User controlled variables list ==========")
-message(STATUS "DPNP_ONEAPI_ROOT:                ${DPNP_ONEAPI_ROOT}")
-message(STATUS "DPNP_STATIC_LIB_ENABLE:          ${DPNP_STATIC_LIB_ENABLE}")
-message(STATUS "DPNP_DEBUG_ENABLE:               ${DPNP_DEBUG_ENABLE}")
-message(STATUS "DPNP_BACKEND_TESTS:              ${DPNP_BACKEND_TESTS}")
-message(STATUS "DPNP_INSTALL_STRUCTURED:         ${DPNP_INSTALL_STRUCTURED}")
-message(STATUS "DPNP_SYCL_QUEUE_MGR_ENABLE:      ${DPNP_SYCL_QUEUE_MGR_ENABLE}")
-message(STATUS " |- DPNP_QUEUEMGR_INCLUDE_DIR:   ${DPNP_QUEUEMGR_INCLUDE_DIR}")
-message(STATUS " |- DPNP_QUEUEMGR_LIB_DIR:       ${DPNP_QUEUEMGR_LIB_DIR}")
-message(STATUS "======= End of user controlled variables list ======")
-
-# -----------------------------------------------------------------------------------------------
-# Compiler-specific logic...
-# -----------------------------------------------------------------------------------------------
-
-# cmake 3.19.1 has a bug in dpcpp compiler detection. Let's assume it is a clang
-# set(CMAKE_CXX_COMPILER_ID "Clang")
-# set(CMAKE_CXX_COMPILER_VERSION 12.0)
-if (CMAKE_VERSION VERSION_EQUAL 3.19.1)
-    message(FATAL_ERROR
-      " Unsupported cmake version ${CMAKE_VERSION}\n"
-      " Please use other cmake version, for example:\n"
-      "in Linux:\n"
-      " curl --output cmake_webimage.tar.gz --url https://cmake.org/files/v3.19/cmake-3.19.2-Linux-x86_64.tar.gz --retry 5 --retry-delay 5\n"
-      " tar -xzf cmake_webimage.tar.gz\n"
-      " rm -f cmake_webimage.tar.gz\n"
-      " export PATH=`pwd`/cmake-3.19.2-Linux-x86_64/bin:$PATH\n"
-      "in Windows:\n"
-      " curl.exe --output cmake_webimage.zip --url https://cmake.org/files/v3.19/cmake-3.19.2-win64-x64.zip --retry 5 --retry-delay 5\n"
-      " tar -xf cmake_webimage.zip\n"
-      " del cmake_webimage.zip\n"
-      " set PATH=%CD%\\cmake-3.19.2-win64-x64\\bin;%PATH%\n"
-        )
-endif()
-
-# SYCL related compile options
-string(CONCAT COMMON_COMPILE_FLAGS
-  "-fsycl "
-  "-fno-approx-func "
-  "-fno-finite-math-only "
-)
-string(CONCAT COMMON_LINK_FLAGS
-  "-fsycl "
-  "-fsycl-device-code-split=per_kernel "
-)
-if(UNIX)
-  set(CMAKE_CXX_COMPILER "icpx")
-  # add_compile_options(-fPIC)
-elseif(WIN32)
-  set(CMAKE_CXX_COMPILER "icx")
-  # set(CMAKE_SHARED_LINKER_FLAGS_INIT "-fuse-ld=lld-link")
-  # set(CMAKE_LINKER "lld-link")
-  # include (Platform/Windows-Clang)
-  # set(CMAKE_LINKER "dpcpp")
-  # set(CMAKE_AR "llvm-ar")
-  # set(CMAKE_RANLIB "llvm-ranlib")
-  # set(CMAKE_CXX_FLAGS "/EHsc")
-
-  string(APPEND COMMON_COMPILE_FLAGS
-    "/EHsc "
-#    "/Ox "
-#    "/W3 "
-#    "/GL "
-#    "/DNDEBUG "
-#    "/MD "
-  )
-else()
-  message(FATAL_ERROR "Unsupported system ${CMAKE_SYSTEM} in compiler selection case")
-endif()
-
-# set language version
-set(CMAKE_CXX_STANDARD 17)
-set(CMAKE_CXX_STANDARD_REQUIRED ON)
-
-# warning flag set
-string(CONCAT DPNP_WARNING_FLAGS
-  "-W "
-  "-Wextra "
-  "-Wshadow "
-  "-Wall "
-  "-Wstrict-prototypes "
-  "-Wformat "
-  "-Wformat-security "
-)
-string(APPEND COMMON_COMPILE_FLAGS
-  "${DPNP_WARNING_FLAGS}"
-)
-
-# debug/release compile definitions
-if(DPNP_DEBUG_ENABLE)
-  set(CMAKE_BUILD_TYPE "Debug")
-  string(APPEND COMMON_COMPILE_FLAGS
-    "-O0 "
-    "-ggdb3 "
-  )
-  string(APPEND COMMON_LINK_FLAGS
-    "-O0 "
-    "-ggdb3 "
-    "-fsycl-link-huge-device-code "
-  )
-else()
-  set(CMAKE_BUILD_TYPE "Release")
-  string(APPEND COMMON_COMPILE_FLAGS
-    "-O3 "
-  )
-endif()
-
-# -----------------------------------------------------------------------------------------------
-# Auxilary building options...
-# -----------------------------------------------------------------------------------------------
-# sdl
-string(CONCAT DPNP_DEFS
-  "-D_FORTIFY_SOURCE=2 "
-)
-if(NOT WIN32)
-  string(APPEND COMMON_COMPILE_FLAGS
-    "-fno-delete-null-pointer-checks "
-    "-fstack-protector-strong "
-    "-fno-strict-overflow "
-    "-fwrapv "
-    )
-  string(APPEND COMMON_LINK_FLAGS
-    "LINKER:-z,noexecstack,-z,relro,-z,now "
-  )
-endif()
-
-# disable PSTL policies due to compiler bug
-string(APPEND DPNP_DEFS
-  "-DPSTL_USE_PARALLEL_POLICIES=0 "
-  "-D_GLIBCXX_USE_TBB_PAR_BACKEND=0 "
-)
-
-# disable PSTL predefined policies objects (global queues, prevent fail on Windows)
-string(APPEND DPNP_DEFS
-  "-DONEDPL_USE_PREDEFINED_POLICIES=0 "
-)
-
-# -----------------------------------------------------------------------------------------------
-# Create project...
-# -----------------------------------------------------------------------------------------------
-# set(CMAKE_CXX_COMPILER "clang++")
-project(dpnp_project
-        # VERSION ${DPNP_VERSION}
-        DESCRIPTION "DPNP: NumPy-like API accelerated with SYCL"
-        HOMEPAGE_URL https://github.com/IntelPython/dpnp
-        LANGUAGES CXX)
-# set(CMAKE_CXX_COMPILER "dpcpp")
-
-# -----------------------------------------------------------------------------------------------
-# Building logic...
-# -----------------------------------------------------------------------------------------------
-set(DPNP_SRC
-    kernels/dpnp_krnl_arraycreation.cpp
-    kernels/dpnp_krnl_bitwise.cpp
-    kernels/dpnp_krnl_common.cpp
-    kernels/dpnp_krnl_elemwise.cpp
-    kernels/dpnp_krnl_fft.cpp
-    kernels/dpnp_krnl_indexing.cpp
-    kernels/dpnp_krnl_linalg.cpp
-    kernels/dpnp_krnl_logic.cpp
-    kernels/dpnp_krnl_manipulation.cpp
-    kernels/dpnp_krnl_mathematical.cpp
-    kernels/dpnp_krnl_random.cpp
-    kernels/dpnp_krnl_reduction.cpp
-    kernels/dpnp_krnl_searching.cpp
-    kernels/dpnp_krnl_sorting.cpp
-    kernels/dpnp_krnl_statistics.cpp
-    src/constants.cpp
-    src/dpnp_iface_fptr.cpp
-    src/memory_sycl.cpp
-    src/queue_sycl.cpp
-    src/verbose.cpp
-    src/dpnp_random_state.cpp
-    )
-
-if(DPNP_STATIC_LIB_ENABLE)
-  add_library(dpnp_backend_c STATIC ${DPNP_SRC})
-else()
-  add_library(dpnp_backend_c SHARED ${DPNP_SRC})
-  set_target_properties(dpnp_backend_c PROPERTIES CMAKE_POSITION_INDEPENDENT_CODE ON)
-endif()
-
-target_include_directories(dpnp_backend_c PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
-target_include_directories(dpnp_backend_c PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/src)
-
-string(REPLACE " " ";" COMMON_COMPILE_FLAGS_AS_LIST ${COMMON_COMPILE_FLAGS})
-target_compile_options(dpnp_backend_c PUBLIC ${COMMON_COMPILE_FLAGS_AS_LIST})
-string(REPLACE " " ";" DPNP_DEFS_AS_LIST ${DPNP_DEFS})
-target_compile_definitions(dpnp_backend_c PUBLIC ${DPNP_DEFS_AS_LIST})
-string(REPLACE " " ";" COMMON_LINK_FLAGS_AS_LIST ${COMMON_LINK_FLAGS})
-target_link_options(dpnp_backend_c PUBLIC ${COMMON_LINK_FLAGS_AS_LIST})
-
-
-# -----------------------------------------------------------------------------------------------
-# Testing logic...
-# -----------------------------------------------------------------------------------------------
-if(DPNP_BACKEND_TESTS)
-  add_subdirectory(tests)
-endif()
-
-# -----------------------------------------------------------------------------------------------
-# Dependencies logic...
-# -----------------------------------------------------------------------------------------------
-# Math library
-find_package(MathLib REQUIRED)
-target_compile_definitions(dpnp_backend_c PUBLIC -DMKL_ILP64=1)
-target_include_directories(dpnp_backend_c PUBLIC ${MATHLIB_INCLUDE_DIR})
-
-link_directories(dpnp_backend_c PUBLIC ${MATHLIB_LIBRARY_DIR}) # does not work with some cmake versions
-target_link_directories(dpnp_backend_c PUBLIC ${MATHLIB_LIBRARY_DIR}) # duplicate link_directories
-
-if(UNIX)
-# Link Line Advisor v6.13
-# -DMKL_ILP64 -I"${MKLROOT}/include"
-# -lmkl_sycl -lmkl_intel_ilp64 -lmkl_sequential -lmkl_core -lsycl -lOpenCL -lpthread -lm -ldl
-  set(DPNP_MATHLIB_DEP_LIBS
-      mkl_sycl
-      mkl_intel_ilp64
-      mkl_tbb_thread # mkl_sequential
-      mkl_core
-      sycl
-      OpenCL
-      pthread
-      m
-      dl
-      CACHE STRING "Set of libraries to link")
-elseif(WIN32)
-# Link Line Advisor v6.13
-# -DMKL_ILP64 -I"%MKLROOT%\include"
-# mkl_sycl_dll.lib mkl_intel_ilp64_dll.lib mkl_sequential_dll.lib mkl_core_dll.lib sycl.lib OpenCL.lib
-  set(DPNP_MATHLIB_DEP_LIBS
-      mkl_sycl_dll
-      mkl_intel_ilp64_dll
-      mkl_tbb_thread_dll # mkl_sequential_dll
-      mkl_core_dll
-      sycl
-      OpenCL
-      CACHE STRING "Set of libraries to link")
-else()
-  message(FATAL_ERROR "Unsupported system ${CMAKE_SYSTEM} in MathLib libraries set")
-endif()
-
-target_link_libraries(dpnp_backend_c PUBLIC ${DPNP_MATHLIB_DEP_LIBS})
-
-# Parallel STL
-find_package(DPL REQUIRED)
-target_include_directories(dpnp_backend_c PUBLIC ${DPL_INCLUDE_DIR})
-
-# SYCL queue manager
-if(DPNP_SYCL_QUEUE_MGR_ENABLE)
-    target_include_directories(dpnp_backend_c PUBLIC ${DPNP_QUEUEMGR_INCLUDE_DIR})
-    target_link_directories(dpnp_backend_c PUBLIC ${DPNP_QUEUEMGR_LIB_DIR})
-    target_link_libraries(dpnp_backend_c PUBLIC "DPCTLSyclInterface")
-
-    # not sure but add runpath
-    set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}:${DPNP_QUEUEMGR_LIB_DIR}")
-
-    # disable stripping rpath in installation logic
-    set_target_properties(dpnp_backend_c PROPERTIES INSTALL_RPATH_USE_LINK_PATH TRUE)
-else()
-    target_compiler_definitions(dpnp_backend_c PUBLIC -DDPNP_LOCAL_QUEUE=1)
-endif()
-
-# -----------------------------------------------------------------------------------------------
-# Installation logic...
-# -----------------------------------------------------------------------------------------------
-set(CMAKE_INSTALL_PREFIX ${CMAKE_SOURCE_DIR}/package_dpnp)
-
-if(DEFINED DPNP_INSTALL_PREFIX)
-  set(CMAKE_INSTALL_PREFIX ${DPNP_INSTALL_PREFIX})
-endif()
-
-if(NOT DPNP_INSTALL_STRUCTURED)
-  set(CMAKE_INSTALL_BINDIR ${CMAKE_INSTALL_PREFIX})
-  set(CMAKE_INSTALL_LIBDIR ${CMAKE_INSTALL_PREFIX})
-  set(CMAKE_INSTALL_INCLUDEDIR ${CMAKE_INSTALL_PREFIX})
-endif()
-
-# set_target_properties(dpnp_backend_c PROPERTIES VERSION ${DPNP_VERSION} SOVERSION ${DPNP_API_VERSION})
-
-install(TARGETS dpnp_backend_c
-        PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE
-        )
-
-# -----------------------------------------------------------------------------------------------
-# Print Global Options
-# -----------------------------------------------------------------------------------------------
-message(STATUS "CMAKE_SYSTEM:                    ${CMAKE_SYSTEM}")
-message(STATUS "CMAKE_SYSTEM_VERSION:            ${CMAKE_SYSTEM_VERSION}")
-message(STATUS "CMAKE_SYSTEM_PROCESSOR:          ${CMAKE_SYSTEM_PROCESSOR}")
-message(STATUS "CMAKE_BUILD_TYPE:                ${CMAKE_BUILD_TYPE}")
-message(STATUS "CXX_STANDARD:                    ${CMAKE_CXX_STANDARD}")
-message(STATUS "CMAKE_CXX_COMPILER_ID:           ${CMAKE_CXX_COMPILER_ID}")
-message(STATUS "CMAKE_CXX_COMPILER_VERSION:      ${CMAKE_CXX_COMPILER_VERSION}")
-message(STATUS "CMAKE_CXX_COMPILER:              ${CMAKE_CXX_COMPILER}")
-message(STATUS "CMAKE_LINKER:                    ${CMAKE_LINKER}")
-message(STATUS "CMAKE_SOURCE_DIR:                ${CMAKE_SOURCE_DIR}")
-message(STATUS "DPNP_INSTALL_PREFIX:             ${CMAKE_INSTALL_PREFIX}")
-message(STATUS "CMAKE_VERBOSE_MAKEFILE:          ${CMAKE_VERBOSE_MAKEFILE}")

From 69a54b123d3e82fca642529277c35d15409fc334 Mon Sep 17 00:00:00 2001
From: Oleksandr Pavlyk <oleksandr.pavlyk@intel.com>
Date: Wed, 22 Mar 2023 11:35:05 -0500
Subject: [PATCH 016/129] Updated conda-recipe build scipts per changes in
 build system

---
 .github/workflows/Windows-IntelLLVM.cmake | 24 +++++++++++++
 conda-recipe/bld.bat                      | 43 ++++++++++++++++++++---
 conda-recipe/build.sh                     | 37 +++----------------
 conda-recipe/meta.yaml                    |  2 ++
 4 files changed, 69 insertions(+), 37 deletions(-)
 create mode 100755 .github/workflows/Windows-IntelLLVM.cmake

diff --git a/.github/workflows/Windows-IntelLLVM.cmake b/.github/workflows/Windows-IntelLLVM.cmake
new file mode 100755
index 000000000000..603fe243b520
--- /dev/null
+++ b/.github/workflows/Windows-IntelLLVM.cmake
@@ -0,0 +1,24 @@
+# Distributed under the OSI-approved BSD 3-Clause License.  See accompanying
+# file Copyright.txt or https://cmake.org/licensing for details.
+
+
+# This module is shared by multiple languages; use include blocker.
+if(__WINDOWS_INTEL)
+  return()
+endif()
+set(__WINDOWS_INTEL 1)
+
+include(Platform/Windows-MSVC)
+macro(__windows_compiler_intel lang)
+  __windows_compiler_msvc(${lang})
+
+  set(CMAKE_${lang}_LINK_EXECUTABLE    "<CMAKE_${lang}_COMPILER> ${CMAKE_CL_NOLOGO} <CMAKE_${lang}_LINK_FLAGS> <OBJECTS> ${CMAKE_START_TEMP_FILE} -link -out:<TARGET> -implib:<TARGET_IMPLIB> -pdb:<TARGET_PDB> -version:<TARGET_VERSION_MAJOR>.<TARGET_VERSION_MINOR>${_PLATFORM_LINK_FLAGS} <LINK_FLAGS> <LINK_LIBRARIES>${CMAKE_END_TEMP_FILE}")
+  set(CMAKE_${lang}_CREATE_SHARED_LIBRARY    "<CMAKE_${lang}_COMPILER> ${CMAKE_CL_NOLOGO} <CMAKE_${lang}_LINK_FLAGS> <OBJECTS> ${CMAKE_START_TEMP_FILE} -LD -link -out:<TARGET> -implib:<TARGET_IMPLIB> -pdb:<TARGET_PDB> -version:<TARGET_VERSION_MAJOR>.<TARGET_VERSION_MINOR>${_PLATFORM_LINK_FLAGS} <LINK_FLAGS> <LINK_LIBRARIES> ${CMAKE_END_TEMP_FILE}")
+  set(CMAKE_${lang}_CREATE_SHARED_MODULE ${CMAKE_${lang}_CREATE_SHARED_LIBRARY})
+  if (NOT "${lang}" STREQUAL "Fortran")    # Fortran driver does not support -fuse-ld, yet
+    set(CMAKE_${lang}_CREATE_STATIC_LIBRARY      "<CMAKE_${lang}_COMPILER> ${CMAKE_CL_NOLOGO} <CMAKE_${lang}_LINK_FLAGS> <OBJECTS> ${CMAKE_START_TEMP_FILE} -fuse-ld=llvm-lib -o <TARGET> -link <LINK_FLAGS> <LINK_LIBRARIES> ${CMAKE_END_TEMP_FILE}")
+  endif()
+  set(CMAKE_DEPFILE_FLAGS_${lang} "-QMMD -QMT <DEP_TARGET> -QMF <DEP_FILE>")
+  set(CMAKE_${lang}_DEPFILE_FORMAT gcc)
+
+endmacro()
diff --git a/conda-recipe/bld.bat b/conda-recipe/bld.bat
index 8ec6c1fb1587..99ba8b17f019 100644
--- a/conda-recipe/bld.bat
+++ b/conda-recipe/bld.bat
@@ -9,10 +9,43 @@ REM @TODO: remove the setting, once transition to build backend on Windows
 REM to cmake is complete.
 SET "SETUPTOOLS_USE_DISTUTILS=stdlib"
 
-IF DEFINED DPLROOT (
-    ECHO "Sourcing DPLROOT"
-    SET "INCLUDE=%DPLROOT%\include;%INCLUDE%"
+"%PYTHON%" setup.py clean --all
+set "SKBUILD_ARGS=-G Ninja -- -DCMAKE_C_COMPILER:PATH=icx -DCMAKE_CXX_COMPILER:PATH=icx -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON"
+
+FOR %%V IN (14.0.0 14 15.0.0 15 16.0.0 16) DO @(
+  REM set DIR_HINT if directory exists
+  IF EXIST "%BUILD_PREFIX%\Library\lib\clang\%%V\" (
+     SET "SYCL_INCLUDE_DIR_HINT=%BUILD_PREFIX%\Library\lib\clang\%%V"
+  )
+)
+
+set "PLATFORM_DIR=%PREFIX%\Library\share\cmake-3.22\Modules\Platform"
+set "FN=Windows-IntelLLVM.cmake"
+
+rem Save the original file, and copy patched file to
+rem fix the issue with IntelLLVM integration with cmake on Windows
+if EXIST "%PLATFORM_DIR%" (
+  dir "%PLATFORM_DIR%\%FN%"
+  copy /Y "%PLATFORM_DIR%\%FN%" .
+  if errorlevel 1 exit 1
+  copy /Y .github\workflows\Windows-IntelLLVM.cmake "%PLATFORM_DIR%"
+  if errorlevel 1 exit 1
 )
 
-%PYTHON% setup.py build_clib
-%PYTHON% setup.py build_ext install
+if NOT "%WHEELS_OUTPUT_FOLDER%"=="" (
+    rem Install and assemble wheel package from the build bits
+    "%PYTHON%" setup.py install bdist_wheel %SKBUILD_ARGS%
+    if errorlevel 1 exit 1
+    copy dist\dpctl*.whl %WHEELS_OUTPUT_FOLDER%
+    if errorlevel 1 exit 1
+) ELSE (
+    rem Only install
+    "%PYTHON%" setup.py install %SKBUILD_ARGS%
+    if errorlevel 1 exit 1
+)
+
+rem copy back
+if EXIST "%PLATFORM_DIR%" (
+   copy /Y "%FN%" "%PLATFORM_DIR%"
+   if errorlevel 1 exit 1
+)
diff --git a/conda-recipe/build.sh b/conda-recipe/build.sh
index 164ad09d578f..b87f58edc00b 100644
--- a/conda-recipe/build.sh
+++ b/conda-recipe/build.sh
@@ -1,41 +1,12 @@
 #!/bin/bash
 
-# if ONEAPI_ROOT is specified (use all from it)
-if [ -n "${ONEAPI_ROOT}" ]; then
-    export DPCPPROOT=${ONEAPI_ROOT}/compiler/latest
-    export MKLROOT=${ONEAPI_ROOT}/mkl/latest
-    export TBBROOT=${ONEAPI_ROOT}/tbb/latest
-    export DPLROOT=${ONEAPI_ROOT}/dpl/latest
-fi
-
-# if DPCPPROOT is specified (work with custom DPCPP)
-if [ -n "${DPCPPROOT}" ]; then
-    . ${DPCPPROOT}/env/vars.sh
-fi
-
-# if MKLROOT is specified (work with custom math library)
-if [ -n "${MKLROOT}" ]; then
-    . ${MKLROOT}/env/vars.sh
-    # conda remove mkl --force -y || true
-fi
-
-# have to activate while SYCL CPU device/driver needs paths
-# if TBBROOT is specified
-if [ -n "${TBBROOT}" ]; then
-    . ${TBBROOT}/env/vars.sh
-fi
-
-# Set RPATH for wheels
-export CFLAGS="-Wl,-rpath,\$ORIGIN/../dpctl,-rpath,\$ORIGIN $CFLAGS"
-export LDFLAGS="-Wl,-rpath,\$ORIGIN/../dpctl,-rpath,\$ORIGIN $LDFLAGS"
-
 # Intel LLVM must cooperate with compiler and sysroot from conda
 echo "--gcc-toolchain=${BUILD_PREFIX} --sysroot=${BUILD_PREFIX}/${HOST}/sysroot -target ${HOST}" > icpx_for_conda.cfg
 export ICPXCFG="$(pwd)/icpx_for_conda.cfg"
 export ICXCFG="$(pwd)/icpx_for_conda.cfg"
 
-$PYTHON setup.py build_clib
-$PYTHON setup.py build_ext install
+export CMAKE_GENERATOR="Ninja"
+SKBUILD_ARGS="-- -DDPCTL_MODULE_PATH=$($PYTHON -m dpctl --cmakedir) -DCMAKE_C_COMPILER:PATH=icx -DCMAKE_CXX_COMPILER:PATH=icpx -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON"
 
 # Build wheel package
 if [ "$CONDA_PY" == "36" ]; then
@@ -44,6 +15,8 @@ else
     WHEELS_BUILD_ARGS="-p manylinux2014_x86_64"
 fi
 if [ -n "${WHEELS_OUTPUT_FOLDER}" ]; then
-    $PYTHON setup.py bdist_wheel ${WHEELS_BUILD_ARGS}
+    $PYTHON setup.py bdist_wheel ${WHEELS_BUILD_ARGS} ${SKBUILD_ARGS}
     cp dist/dpnp*.whl ${WHEELS_OUTPUT_FOLDER}
+else
+    $PYTHON setup.py ${SKBUILD_ARGS}
 fi
diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml
index 875742c1cf2a..bef7e00618c0 100644
--- a/conda-recipe/meta.yaml
+++ b/conda-recipe/meta.yaml
@@ -11,6 +11,8 @@ requirements:
       - numpy >=1.19,<1.25a0
       - cython
       - cmake >=3.21
+      - ninja
+      - git
       - dpctl >=0.14.2
       - mkl-devel-dpcpp {{ environ.get('MKL_VER', '>=2023.0.0') }}
       - onedpl-devel

From 18d8d39a20e9edf33b5f9344bb27323fa76894d4 Mon Sep 17 00:00:00 2001
From: Oleksandr Pavlyk <oleksandr.pavlyk@intel.com>
Date: Wed, 22 Mar 2023 11:57:32 -0500
Subject: [PATCH 017/129] Vendored oneDPLConfig.cmake, provide HINTS in
 find_packages(oneDPL)

---
 CMakeLists.txt                                |  2 +-
 dpnp/backend/cmake/Modules/README.md          |  5 +
 dpnp/backend/cmake/Modules/oneDPLConfig.cmake | 96 +++++++++++++++++++
 3 files changed, 102 insertions(+), 1 deletion(-)
 create mode 100644 dpnp/backend/cmake/Modules/README.md
 create mode 100755 dpnp/backend/cmake/Modules/oneDPLConfig.cmake

diff --git a/CMakeLists.txt b/CMakeLists.txt
index daf2c28be9b2..b7ab4ca53cc4 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -36,7 +36,7 @@ set(MKL_INTERFACE_FULL "intel_ilp64")
 set(MKL_THREADING "tbb_thread")
 find_package(MKL REQUIRED)
 
-find_package(oneDPL REQUIRED)
+find_package(oneDPL REQUIRED HINTS ${CMAKE_SOURCE_DIR}/dpnp/backend/cmake/Modules)
 
 include(GNUInstallDirs)
 
diff --git a/dpnp/backend/cmake/Modules/README.md b/dpnp/backend/cmake/Modules/README.md
new file mode 100644
index 000000000000..b2d9bc9b50f3
--- /dev/null
+++ b/dpnp/backend/cmake/Modules/README.md
@@ -0,0 +1,5 @@
+# oneAPI DPL cmake script vendored from Intel oneAPI BaseKit 2023.0.0
+
+This is done to work around absence of this script in onedpl-devel conda
+package. Once it is added, expected 2023.2.0, this vendored package is
+to be removed.
diff --git a/dpnp/backend/cmake/Modules/oneDPLConfig.cmake b/dpnp/backend/cmake/Modules/oneDPLConfig.cmake
new file mode 100755
index 000000000000..a45a402a299e
--- /dev/null
+++ b/dpnp/backend/cmake/Modules/oneDPLConfig.cmake
@@ -0,0 +1,96 @@
+##===----------------------------------------------------------------------===##
+#
+# Copyright (C) Intel Corporation
+#
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+# This file incorporates work covered by the following copyright and permission
+# notice:
+#
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+#
+##===----------------------------------------------------------------------===##
+
+# Installation path: <onedpl_root>/lib/cmake/oneDPL/
+get_filename_component(_onedpl_root "${CMAKE_CURRENT_LIST_DIR}" REALPATH)
+get_filename_component(_onedpl_root "${_onedpl_root}/../../../" ABSOLUTE)
+
+if (WIN32)
+    set(_onedpl_headers_subdir windows)
+else()
+    set(_onedpl_headers_subdir linux)
+endif()
+
+get_filename_component(_onedpl_headers "${_onedpl_root}/${_onedpl_headers_subdir}/include" ABSOLUTE)
+
+if (EXISTS "${_onedpl_headers}")
+    if (NOT TARGET oneDPL)
+        include(CheckCXXCompilerFlag)
+
+        add_library(oneDPL INTERFACE IMPORTED)
+        set_target_properties(oneDPL PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${_onedpl_headers}")
+
+        if (ONEDPL_PAR_BACKEND AND NOT ONEDPL_PAR_BACKEND MATCHES "^(tbb|openmp|serial)$")
+            message(STATUS "oneDPL: ONEDPL_PAR_BACKEND=${ONEDPL_PAR_BACKEND} is requested, but not supported, available backends: tbb, openmp, serial")
+            set(oneDPL_FOUND FALSE)
+            return()
+        endif()
+
+        if (NOT ONEDPL_PAR_BACKEND OR ONEDPL_PAR_BACKEND STREQUAL "tbb")  # Handle oneTBB backend
+            find_package(TBB 2021 QUIET COMPONENTS tbb)
+            if (NOT TBB_FOUND AND ONEDPL_PAR_BACKEND STREQUAL "tbb")  # If oneTBB backend is requested explicitly, but not found.
+                message(STATUS "oneDPL: ONEDPL_PAR_BACKEND=${ONEDPL_PAR_BACKEND} requested, but not found")
+                set(oneDPL_FOUND FALSE)
+                return()
+            elseif (TBB_FOUND)
+                set(ONEDPL_PAR_BACKEND tbb)
+                message(STATUS "oneDPL: ONEDPL_PAR_BACKEND=${ONEDPL_PAR_BACKEND}, disable OpenMP backend")
+                set_target_properties(oneDPL PROPERTIES INTERFACE_LINK_LIBRARIES TBB::tbb)
+                set_property(TARGET oneDPL APPEND PROPERTY INTERFACE_COMPILE_DEFINITIONS ONEDPL_USE_TBB_BACKEND=1 ONEDPL_USE_OPENMP_BACKEND=0)
+            endif()
+        endif()
+
+        if (NOT ONEDPL_PAR_BACKEND OR ONEDPL_PAR_BACKEND STREQUAL "openmp")  # Handle OpenMP backend
+            if (UNIX)
+                set(_openmp_flag "-fopenmp")
+            else()
+                set(_openmp_flag "-Qopenmp")
+            endif()
+
+            # Some compilers may fail if _openmp_flag is not in CMAKE_REQUIRED_LIBRARIES.
+            set(_onedpl_saved_required_libs ${CMAKE_REQUIRED_LIBRARIES})
+            set(CMAKE_REQUIRED_LIBRARIES ${_openmp_option})
+            check_cxx_compiler_flag(${_openmp_flag} _openmp_option)
+            set(CMAKE_REQUIRED_LIBRARIES ${_onedpl_saved_required_libs})
+            unset(_onedpl_saved_required_libs)
+
+            if (NOT _openmp_option AND ONEDPL_PAR_BACKEND STREQUAL "openmp")  # If OpenMP backend is requested explicitly, but not supported.
+                message(STATUS "oneDPL: ONEDPL_PAR_BACKEND=${ONEDPL_PAR_BACKEND} requested, but not supported")
+                set(oneDPL_FOUND FALSE)
+                return()
+            elseif (_openmp_option)
+                set(ONEDPL_PAR_BACKEND openmp)
+                message(STATUS "oneDPL: ONEDPL_PAR_BACKEND=${ONEDPL_PAR_BACKEND}, disable oneTBB backend")
+                set_target_properties(oneDPL PROPERTIES INTERFACE_COMPILE_OPTIONS ${_openmp_flag})
+                set_target_properties(oneDPL PROPERTIES INTERFACE_LINK_LIBRARIES ${_openmp_flag})
+                set_property(TARGET oneDPL APPEND PROPERTY INTERFACE_COMPILE_DEFINITIONS ONEDPL_USE_TBB_BACKEND=0 ONEDPL_USE_OPENMP_BACKEND=1)
+            endif()
+        endif()
+
+        if (NOT ONEDPL_PAR_BACKEND OR ONEDPL_PAR_BACKEND STREQUAL "serial")
+            set(ONEDPL_PAR_BACKEND serial)
+            message(STATUS "oneDPL: ONEDPL_PAR_BACKEND=${ONEDPL_PAR_BACKEND}, disable oneTBB and OpenMP backends")
+            set_property(TARGET oneDPL APPEND PROPERTY INTERFACE_COMPILE_DEFINITIONS ONEDPL_USE_TBB_BACKEND=0 ONEDPL_USE_OPENMP_BACKEND=0)
+        endif()
+
+        check_cxx_compiler_flag("-fsycl" _fsycl_option)
+        if (NOT _fsycl_option)
+            message(STATUS "oneDPL: -fsycl is not supported by current compiler, set ONEDPL_USE_DPCPP_BACKEND=0")
+            set_property(TARGET oneDPL APPEND PROPERTY INTERFACE_COMPILE_DEFINITIONS ONEDPL_USE_DPCPP_BACKEND=0)
+        endif()
+    endif()
+else()
+    message(STATUS "oneDPL: headers do not exist ${_onedpl_headers}")
+    set(oneDPL_FOUND FALSE)
+endif()

From 327e8610e6201550b3f6adf08c24dfbefb79587a Mon Sep 17 00:00:00 2001
From: Oleksandr Pavlyk <oleksandr.pavlyk@intel.com>
Date: Wed, 22 Mar 2023 16:17:47 -0500
Subject: [PATCH 018/129] Fixed build.sh to specify install command

---
 conda-recipe/build.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/conda-recipe/build.sh b/conda-recipe/build.sh
index b87f58edc00b..b2185fb37659 100644
--- a/conda-recipe/build.sh
+++ b/conda-recipe/build.sh
@@ -15,8 +15,8 @@ else
     WHEELS_BUILD_ARGS="-p manylinux2014_x86_64"
 fi
 if [ -n "${WHEELS_OUTPUT_FOLDER}" ]; then
-    $PYTHON setup.py bdist_wheel ${WHEELS_BUILD_ARGS} ${SKBUILD_ARGS}
+    $PYTHON setup.py install bdist_wheel ${WHEELS_BUILD_ARGS} ${SKBUILD_ARGS}
     cp dist/dpnp*.whl ${WHEELS_OUTPUT_FOLDER}
 else
-    $PYTHON setup.py ${SKBUILD_ARGS}
+    $PYTHON setup.py install ${SKBUILD_ARGS}
 fi

From 84efe0f7d364d3943671465a4bc7316e02007baf Mon Sep 17 00:00:00 2001
From: Oleksandr Pavlyk <oleksandr.pavlyk@intel.com>
Date: Wed, 22 Mar 2023 16:35:08 -0500
Subject: [PATCH 019/129] Remember to set MKL_DIR and TBB_DIR

---
 conda-recipe/bld.bat  | 3 +++
 conda-recipe/build.sh | 2 ++
 2 files changed, 5 insertions(+)

diff --git a/conda-recipe/bld.bat b/conda-recipe/bld.bat
index 99ba8b17f019..eac24f7ad07c 100644
--- a/conda-recipe/bld.bat
+++ b/conda-recipe/bld.bat
@@ -10,6 +10,9 @@ REM to cmake is complete.
 SET "SETUPTOOLS_USE_DISTUTILS=stdlib"
 
 "%PYTHON%" setup.py clean --all
+
+set "MKL_DIR=%CONDA_PREFIX%"
+set "TBB_DIR=%CONDA_PREFIX%"
 set "SKBUILD_ARGS=-G Ninja -- -DCMAKE_C_COMPILER:PATH=icx -DCMAKE_CXX_COMPILER:PATH=icx -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON"
 
 FOR %%V IN (14.0.0 14 15.0.0 15 16.0.0 16) DO @(
diff --git a/conda-recipe/build.sh b/conda-recipe/build.sh
index b2185fb37659..efd2b4b5d0e1 100644
--- a/conda-recipe/build.sh
+++ b/conda-recipe/build.sh
@@ -6,6 +6,8 @@ export ICPXCFG="$(pwd)/icpx_for_conda.cfg"
 export ICXCFG="$(pwd)/icpx_for_conda.cfg"
 
 export CMAKE_GENERATOR="Ninja"
+export TBB_DIR=$CONDA_PREFIX
+export MKL_DIR=$CONDA_PREFIX
 SKBUILD_ARGS="-- -DDPCTL_MODULE_PATH=$($PYTHON -m dpctl --cmakedir) -DCMAKE_C_COMPILER:PATH=icx -DCMAKE_CXX_COMPILER:PATH=icpx -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON"
 
 # Build wheel package

From 840e3ff534447f6acf15c71378259b5fe6c8600d Mon Sep 17 00:00:00 2001
From: Oleksandr Pavlyk <oleksandr.pavlyk@intel.com>
Date: Wed, 22 Mar 2023 16:45:22 -0500
Subject: [PATCH 020/129] Vendor TBBConfig.cmake since it has only been added
 to tbb-devel in 2023.1.0 release

---
 CMakeLists.txt                             |   2 +-
 dpnp/backend/cmake/Modules/TBBConfig.cmake | 109 +++++++++++++++++++++
 2 files changed, 110 insertions(+), 1 deletion(-)
 create mode 100644 dpnp/backend/cmake/Modules/TBBConfig.cmake

diff --git a/CMakeLists.txt b/CMakeLists.txt
index b7ab4ca53cc4..d49afe3bd9bf 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -28,7 +28,7 @@ set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${DPCTL_MODULE_PATH})
 
 
 find_package(IntelDPCPP REQUIRED)
-find_package(TBB REQUIRED)
+find_package(TBB REQUIRED HINTS ${CMAKE_SOURCE_DIR}/dpnp/backend/cmake/Modules)
 
 set(MKL_ARCH "intel64")
 set(MKL_LINK "dynamic")
diff --git a/dpnp/backend/cmake/Modules/TBBConfig.cmake b/dpnp/backend/cmake/Modules/TBBConfig.cmake
new file mode 100644
index 000000000000..c2f5a3cf23df
--- /dev/null
+++ b/dpnp/backend/cmake/Modules/TBBConfig.cmake
@@ -0,0 +1,109 @@
+# Copyright (c) 2017-2021 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# It defines the following variables:
+#     TBB_<component>_FOUND
+#     TBB_IMPORTED_TARGETS
+#
+# TBBConfigVersion.cmake defines TBB_VERSION
+#
+# Initialize to default values
+if (NOT TBB_IMPORTED_TARGETS)
+    set(TBB_IMPORTED_TARGETS "")
+endif()
+
+if (NOT TBB_FIND_COMPONENTS)
+    set(TBB_FIND_COMPONENTS "tbb;tbbmalloc;tbbmalloc_proxy")
+    foreach (_tbb_component ${TBB_FIND_COMPONENTS})
+        set(TBB_FIND_REQUIRED_${_tbb_component} 1)
+    endforeach()
+endif()
+
+get_filename_component(_tbb_root "${CMAKE_CURRENT_LIST_DIR}" REALPATH)
+get_filename_component(_tbb_root "${_tbb_root}/../../.." ABSOLUTE)
+
+set(TBB_INTERFACE_VERSION )
+
+set(_tbb_bin_version 12)
+set(_tbbmalloc_bin_version 2)
+set(_tbbmalloc_proxy_bin_version 2)
+set(_tbbbind_bin_version 3)
+
+# Add components with internal dependencies: tbbmalloc_proxy -> tbbmalloc
+list(FIND TBB_FIND_COMPONENTS tbbmalloc_proxy _tbbmalloc_proxy_ix)
+if (NOT _tbbmalloc_proxy_ix EQUAL -1)
+    list(APPEND TBB_FIND_COMPONENTS tbbmalloc)
+    list(REMOVE_DUPLICATES TBB_FIND_COMPONENTS)
+    set(TBB_FIND_REQUIRED_tbbmalloc ${TBB_FIND_REQUIRED_tbbmalloc_proxy})
+endif()
+unset(_tbbmalloc_proxy_ix)
+
+if (CMAKE_SIZEOF_VOID_P STREQUAL "8")
+    set(_tbb_subdir intel64/gcc4.8)
+else ()
+    set(_tbb_subdir ia32/gcc4.8)
+endif()
+
+foreach (_tbb_component ${TBB_FIND_COMPONENTS})
+    set(TBB_${_tbb_component}_FOUND 0)
+    
+    get_filename_component(_tbb_release_lib "${_tbb_root}/lib/${_tbb_subdir}/lib${_tbb_component}${_bin_version}.so.${_${_tbb_component}_bin_version}" ABSOLUTE)
+
+    if (NOT TBB_FIND_RELEASE_ONLY)
+        get_filename_component(_tbb_debug_lib "${_tbb_root}/lib/${_tbb_subdir}/lib${_tbb_component}${_bin_version}_debug.so.${_${_tbb_component}_bin_version}" ABSOLUTE)
+    endif()
+
+    if (EXISTS "${_tbb_release_lib}" OR EXISTS "${_tbb_debug_lib}")
+        if (NOT TARGET TBB::${_tbb_component})
+            add_library(TBB::${_tbb_component} SHARED IMPORTED)
+
+            get_filename_component(_tbb_include_dir "${_tbb_root}/include" ABSOLUTE)
+            set_target_properties(TBB::${_tbb_component} PROPERTIES
+                                  INTERFACE_INCLUDE_DIRECTORIES "${_tbb_include_dir}")
+            unset(_tbb_current_realpath)
+            unset(_tbb_include_dir)
+
+            if (EXISTS "${_tbb_release_lib}")
+                set_target_properties(TBB::${_tbb_component} PROPERTIES
+                                      IMPORTED_LOCATION_RELEASE "${_tbb_release_lib}")
+                set_property(TARGET TBB::${_tbb_component} APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE)
+            endif()
+
+            if (EXISTS "${_tbb_debug_lib}")
+                set_target_properties(TBB::${_tbb_component} PROPERTIES
+                                      IMPORTED_LOCATION_DEBUG "${_tbb_debug_lib}")
+                set_property(TARGET TBB::${_tbb_component} APPEND PROPERTY IMPORTED_CONFIGURATIONS DEBUG)
+            endif()
+
+            # Add internal dependencies for imported targets: TBB::tbbmalloc_proxy -> TBB::tbbmalloc
+            if (_tbb_component STREQUAL tbbmalloc_proxy)
+                set_target_properties(TBB::tbbmalloc_proxy PROPERTIES INTERFACE_LINK_LIBRARIES TBB::tbbmalloc)
+            endif()
+        endif()
+        list(APPEND TBB_IMPORTED_TARGETS TBB::${_tbb_component})
+        set(TBB_${_tbb_component}_FOUND 1)
+    elseif (TBB_FIND_REQUIRED AND TBB_FIND_REQUIRED_${_tbb_component})
+        message(STATUS "Missed required oneTBB component: ${_tbb_component}")
+        if (TBB_FIND_RELEASE_ONLY)
+            message(STATUS "  ${_tbb_release_lib} must exist.")
+        else()
+            message(STATUS "  one or both of:\n   ${_tbb_release_lib}\n    ${_tbb_debug_lib}\n   files must exist.")
+        endif()
+        set(TBB_FOUND FALSE)
+    endif()
+endforeach()
+list(REMOVE_DUPLICATES TBB_IMPORTED_TARGETS)
+unset(_tbb_release_lib)
+unset(_tbb_debug_lib)
+unset(_tbb_root)

From 4e865770ec14486aa9fff8dec00c50f6c30eff62 Mon Sep 17 00:00:00 2001
From: Oleksandr Pavlyk <oleksandr.pavlyk@intel.com>
Date: Thu, 23 Mar 2023 09:47:47 -0500
Subject: [PATCH 021/129] Tweaked TBBConfig to use find_library instead of
 find_filename_component

---
 dpnp/backend/cmake/Modules/README.md       | 5 ++++-
 dpnp/backend/cmake/Modules/TBBConfig.cmake | 8 ++++++--
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/dpnp/backend/cmake/Modules/README.md b/dpnp/backend/cmake/Modules/README.md
index b2d9bc9b50f3..02b4c40e0a4d 100644
--- a/dpnp/backend/cmake/Modules/README.md
+++ b/dpnp/backend/cmake/Modules/README.md
@@ -1,5 +1,8 @@
-# oneAPI DPL cmake script vendored from Intel oneAPI BaseKit 2023.0.0
+# oneAPI CMake scripts vendored from Intel oneAPI BaseKit 2023.0.0
 
 This is done to work around absence of this script in onedpl-devel conda
 package. Once it is added, expected 2023.2.0, this vendored package is
 to be removed.
+
+tbb-devel script has been modified to allow it to work correctly in conda
+environment.
\ No newline at end of file
diff --git a/dpnp/backend/cmake/Modules/TBBConfig.cmake b/dpnp/backend/cmake/Modules/TBBConfig.cmake
index c2f5a3cf23df..53323e56bd5d 100644
--- a/dpnp/backend/cmake/Modules/TBBConfig.cmake
+++ b/dpnp/backend/cmake/Modules/TBBConfig.cmake
@@ -58,10 +58,14 @@ endif()
 foreach (_tbb_component ${TBB_FIND_COMPONENTS})
     set(TBB_${_tbb_component}_FOUND 0)
     
-    get_filename_component(_tbb_release_lib "${_tbb_root}/lib/${_tbb_subdir}/lib${_tbb_component}${_bin_version}.so.${_${_tbb_component}_bin_version}" ABSOLUTE)
+    find_library(_tbb_release_lib NAMES lib${_tbb_component}${_bin_version}.so.${_${_tbb_component}_bin_version}
+                  PATHS ${_tbb_root}
+                  PATH_SUFFIXES "lib" "lib/${_tbb_subdir}")
 
     if (NOT TBB_FIND_RELEASE_ONLY)
-        get_filename_component(_tbb_debug_lib "${_tbb_root}/lib/${_tbb_subdir}/lib${_tbb_component}${_bin_version}_debug.so.${_${_tbb_component}_bin_version}" ABSOLUTE)
+        find_library(_tbb_debug_lib NAMES lib${_tbb_component}${_bin_version}_debug.so.${_${_tbb_component}_bin_version}
+                     PATHS ${_tbb_root}
+                     PATH_SUFFIXES "lib" "lib/${_tbb_subdir}")
     endif()
 
     if (EXISTS "${_tbb_release_lib}" OR EXISTS "${_tbb_debug_lib}")

From bac4c4b60e74121a9f67cf1af70baf7f0f27ddf6 Mon Sep 17 00:00:00 2001
From: Oleksandr Pavlyk <oleksandr.pavlyk@intel.com>
Date: Thu, 23 Mar 2023 11:16:17 -0500
Subject: [PATCH 022/129] Use find_path in oneDPLConfig.cmake

---
 dpnp/backend/cmake/Modules/TBBConfig.cmake    | 2 +-
 dpnp/backend/cmake/Modules/oneDPLConfig.cmake | 8 +++++++-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/dpnp/backend/cmake/Modules/TBBConfig.cmake b/dpnp/backend/cmake/Modules/TBBConfig.cmake
index 53323e56bd5d..be801684feb8 100644
--- a/dpnp/backend/cmake/Modules/TBBConfig.cmake
+++ b/dpnp/backend/cmake/Modules/TBBConfig.cmake
@@ -65,7 +65,7 @@ foreach (_tbb_component ${TBB_FIND_COMPONENTS})
     if (NOT TBB_FIND_RELEASE_ONLY)
         find_library(_tbb_debug_lib NAMES lib${_tbb_component}${_bin_version}_debug.so.${_${_tbb_component}_bin_version}
                      PATHS ${_tbb_root}
-                     PATH_SUFFIXES "lib" "lib/${_tbb_subdir}")
+m                    PATH_SUFFIXES "lib" "lib/${_tbb_subdir}")
     endif()
 
     if (EXISTS "${_tbb_release_lib}" OR EXISTS "${_tbb_debug_lib}")
diff --git a/dpnp/backend/cmake/Modules/oneDPLConfig.cmake b/dpnp/backend/cmake/Modules/oneDPLConfig.cmake
index a45a402a299e..d3e30d45d94e 100755
--- a/dpnp/backend/cmake/Modules/oneDPLConfig.cmake
+++ b/dpnp/backend/cmake/Modules/oneDPLConfig.cmake
@@ -22,7 +22,13 @@ else()
     set(_onedpl_headers_subdir linux)
 endif()
 
-get_filename_component(_onedpl_headers "${_onedpl_root}/${_onedpl_headers_subdir}/include" ABSOLUTE)
+
+find_path(_onedpl_header
+  NAMES include
+  PATHS ${_onedpl_root}
+  PATH_SUFFIXES "" ${_onedpl_headers_subdir}
+)
+
 
 if (EXISTS "${_onedpl_headers}")
     if (NOT TARGET oneDPL)

From 9ec7643519e7a46fe659b5cc16a69beaca2b2453 Mon Sep 17 00:00:00 2001
From: Oleksandr Pavlyk <oleksandr.pavlyk@intel.com>
Date: Thu, 23 Mar 2023 11:18:44 -0500
Subject: [PATCH 023/129] Use . as possible suffix instead of empty string

---
 dpnp/backend/cmake/Modules/oneDPLConfig.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dpnp/backend/cmake/Modules/oneDPLConfig.cmake b/dpnp/backend/cmake/Modules/oneDPLConfig.cmake
index d3e30d45d94e..8b6fe883aa7e 100755
--- a/dpnp/backend/cmake/Modules/oneDPLConfig.cmake
+++ b/dpnp/backend/cmake/Modules/oneDPLConfig.cmake
@@ -26,7 +26,7 @@ endif()
 find_path(_onedpl_header
   NAMES include
   PATHS ${_onedpl_root}
-  PATH_SUFFIXES "" ${_onedpl_headers_subdir}
+  PATH_SUFFIXES "." ${_onedpl_headers_subdir}
 )
 
 

From a118769529f8a8987c852b47fa5d9bf62c34736f Mon Sep 17 00:00:00 2001
From: Oleksandr Pavlyk <oleksandr.pavlyk@intel.com>
Date: Thu, 23 Mar 2023 11:42:47 -0500
Subject: [PATCH 024/129] Set DPL_ROOT_HINT, MKL_ROOT_HINT, TBB_ROOT_HINT

---
 conda-recipe/bld.bat  | 6 ++++--
 conda-recipe/build.sh | 5 +++--
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/conda-recipe/bld.bat b/conda-recipe/bld.bat
index eac24f7ad07c..0cf7a1e1f6f8 100644
--- a/conda-recipe/bld.bat
+++ b/conda-recipe/bld.bat
@@ -11,8 +11,10 @@ SET "SETUPTOOLS_USE_DISTUTILS=stdlib"
 
 "%PYTHON%" setup.py clean --all
 
-set "MKL_DIR=%CONDA_PREFIX%"
-set "TBB_DIR=%CONDA_PREFIX%"
+set "MKL_ROOT_HINT=%CONDA_PREFIX%"
+set "TBB_ROOT_HINT=%CONDA_PREFIX%"
+set "MKL_ROOT_HINT=%CONDA_PREFIX%"
+set "DPL_ROOT_HINT=%CONDA_PREFIX%"
 set "SKBUILD_ARGS=-G Ninja -- -DCMAKE_C_COMPILER:PATH=icx -DCMAKE_CXX_COMPILER:PATH=icx -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON"
 
 FOR %%V IN (14.0.0 14 15.0.0 15 16.0.0 16) DO @(
diff --git a/conda-recipe/build.sh b/conda-recipe/build.sh
index efd2b4b5d0e1..ff97b397a1e9 100644
--- a/conda-recipe/build.sh
+++ b/conda-recipe/build.sh
@@ -6,8 +6,9 @@ export ICPXCFG="$(pwd)/icpx_for_conda.cfg"
 export ICXCFG="$(pwd)/icpx_for_conda.cfg"
 
 export CMAKE_GENERATOR="Ninja"
-export TBB_DIR=$CONDA_PREFIX
-export MKL_DIR=$CONDA_PREFIX
+export TBB_ROOT_HINT=$CONDA_PREFIX
+export DPL_ROOT_HINT=$CONDA_PREFIX
+export MKL_ROOT_HINT=$CONDA_PREFIX
 SKBUILD_ARGS="-- -DDPCTL_MODULE_PATH=$($PYTHON -m dpctl --cmakedir) -DCMAKE_C_COMPILER:PATH=icx -DCMAKE_CXX_COMPILER:PATH=icpx -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON"
 
 # Build wheel package

From 2e44586083dc96ff209b6a24d41489fc23f2b5f1 Mon Sep 17 00:00:00 2001
From: Oleksandr Pavlyk <oleksandr.pavlyk@intel.com>
Date: Thu, 23 Mar 2023 11:43:30 -0500
Subject: [PATCH 025/129] Adding HINTS to find_library/find_path calls, fixed
 typo

---
 dpnp/backend/cmake/Modules/TBBConfig.cmake    | 4 +++-
 dpnp/backend/cmake/Modules/oneDPLConfig.cmake | 3 ++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/dpnp/backend/cmake/Modules/TBBConfig.cmake b/dpnp/backend/cmake/Modules/TBBConfig.cmake
index be801684feb8..24854d74e912 100644
--- a/dpnp/backend/cmake/Modules/TBBConfig.cmake
+++ b/dpnp/backend/cmake/Modules/TBBConfig.cmake
@@ -60,12 +60,14 @@ foreach (_tbb_component ${TBB_FIND_COMPONENTS})
     
     find_library(_tbb_release_lib NAMES lib${_tbb_component}${_bin_version}.so.${_${_tbb_component}_bin_version}
                   PATHS ${_tbb_root}
+                  HINTS ENV TBB_ROOT_HINT
                   PATH_SUFFIXES "lib" "lib/${_tbb_subdir}")
 
     if (NOT TBB_FIND_RELEASE_ONLY)
         find_library(_tbb_debug_lib NAMES lib${_tbb_component}${_bin_version}_debug.so.${_${_tbb_component}_bin_version}
                      PATHS ${_tbb_root}
-m                    PATH_SUFFIXES "lib" "lib/${_tbb_subdir}")
+                     HINTS ENV TBB_ROOT_HINT
+                     PATH_SUFFIXES "lib" "lib/${_tbb_subdir}")
     endif()
 
     if (EXISTS "${_tbb_release_lib}" OR EXISTS "${_tbb_debug_lib}")
diff --git a/dpnp/backend/cmake/Modules/oneDPLConfig.cmake b/dpnp/backend/cmake/Modules/oneDPLConfig.cmake
index 8b6fe883aa7e..ebb8da6e0345 100755
--- a/dpnp/backend/cmake/Modules/oneDPLConfig.cmake
+++ b/dpnp/backend/cmake/Modules/oneDPLConfig.cmake
@@ -23,9 +23,10 @@ else()
 endif()
 
 
-find_path(_onedpl_header
+find_path(_onedpl_headers
   NAMES include
   PATHS ${_onedpl_root}
+  HITNS ENV DPL_ROOT_HINT
   PATH_SUFFIXES "." ${_onedpl_headers_subdir}
 )
 

From a96f6a0343fa542f993ef1b56e793b468aa4f25d Mon Sep 17 00:00:00 2001
From: Oleksandr Pavlyk <oleksandr.pavlyk@intel.com>
Date: Thu, 23 Mar 2023 12:07:12 -0500
Subject: [PATCH 026/129] Add logic to set DPCTL_MODULE_PATH in bld.bat

---
 conda-recipe/bld.bat | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/conda-recipe/bld.bat b/conda-recipe/bld.bat
index 0cf7a1e1f6f8..7f4a5bacb867 100644
--- a/conda-recipe/bld.bat
+++ b/conda-recipe/bld.bat
@@ -13,9 +13,14 @@ SET "SETUPTOOLS_USE_DISTUTILS=stdlib"
 
 set "MKL_ROOT_HINT=%CONDA_PREFIX%"
 set "TBB_ROOT_HINT=%CONDA_PREFIX%"
-set "MKL_ROOT_HINT=%CONDA_PREFIX%"
 set "DPL_ROOT_HINT=%CONDA_PREFIX%"
-set "SKBUILD_ARGS=-G Ninja -- -DCMAKE_C_COMPILER:PATH=icx -DCMAKE_CXX_COMPILER:PATH=icx -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON"
+
+%PYTHON% -m dpctl --cmakedir > Output
+set /p DPCTL_CMAKE_DIR= < Output
+
+set "SKBUILD_ARGS=-G Ninja -- -DCMAKE_C_COMPILER:PATH=icx -DCMAKE_CXX_COMPILER:PATH=icx"
+set "SKBUILD_ARGS=%SKBUILD_ARGS% -DDPCTL_MODULE_PATH:PATH=%DPCTL_CMAKE_DIR% "
+set "SKBUILD_ARGS=%SKBUILD_ARGS% -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON"
 
 FOR %%V IN (14.0.0 14 15.0.0 15 16.0.0 16) DO @(
   REM set DIR_HINT if directory exists

From e04c08116b8007a877ce9794f726fa4d6c0ff128 Mon Sep 17 00:00:00 2001
From: Oleksandr Pavlyk <oleksandr.pavlyk@intel.com>
Date: Thu, 23 Mar 2023 12:09:07 -0500
Subject: [PATCH 027/129] Install scikit-build in build-sphinx.yaml

---
 .github/workflows/build-sphinx.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build-sphinx.yml b/.github/workflows/build-sphinx.yml
index 95db7b640f1f..6aab25b929de 100644
--- a/.github/workflows/build-sphinx.yml
+++ b/.github/workflows/build-sphinx.yml
@@ -74,7 +74,7 @@ jobs:
       - name: Install dpnp dependencies
         run: |
           conda install dpctl mkl-devel-dpcpp onedpl-devel tbb-devel dpcpp_linux-64 \
-              cmake cython pytest -c dppy/label/dev -c intel -c conda-forge
+              cmake cython pytest scikit-build -c dppy/label/dev -c intel -c conda-forge
 
       - name: Install cuPy dependencies
         run: conda install -c conda-forge cupy cudatoolkit=10.0

From 43d07e61e50719294736760909a27facedb32670 Mon Sep 17 00:00:00 2001
From: Oleksandr Pavlyk <oleksandr.pavlyk@intel.com>
Date: Thu, 23 Mar 2023 12:14:17 -0500
Subject: [PATCH 028/129] Corrected build command in build-sphinx, also install
 ninja

---
 .github/workflows/build-sphinx.yml | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/build-sphinx.yml b/.github/workflows/build-sphinx.yml
index 6aab25b929de..f6664f66c1f0 100644
--- a/.github/workflows/build-sphinx.yml
+++ b/.github/workflows/build-sphinx.yml
@@ -74,21 +74,20 @@ jobs:
       - name: Install dpnp dependencies
         run: |
           conda install dpctl mkl-devel-dpcpp onedpl-devel tbb-devel dpcpp_linux-64 \
-              cmake cython pytest scikit-build -c dppy/label/dev -c intel -c conda-forge
+              cmake cython pytest ninja scikit-build -c dppy/label/dev -c intel -c conda-forge
 
       - name: Install cuPy dependencies
         run: conda install -c conda-forge cupy cudatoolkit=10.0
 
       - name: Conda info
-        run: |
-          conda info
-          conda list
+        run: conda info
+
+      - name: Conda list
+        run: conda list
 
       - name: Build library
         run: |
-          python setup.py build_clib
-          CC=icpx python setup.py build_ext --inplace
-          python setup.py develop
+          CC=icx CXX=icpx python setup.py develop -G Ninja -- -DDPCTL_MODULE_PATH=$(python -m dpctl --cmakedir)
 
       - name: Build docs
         run: make html

From d56b0b683d906d7ae9e415bac51901c1e9a5c876 Mon Sep 17 00:00:00 2001
From: Oleksandr Pavlyk <oleksandr.pavlyk@intel.com>
Date: Thu, 23 Mar 2023 12:58:11 -0500
Subject: [PATCH 029/129] Replaced stray get_filename_component with find_path

---
 dpnp/backend/cmake/Modules/TBBConfig.cmake | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/dpnp/backend/cmake/Modules/TBBConfig.cmake b/dpnp/backend/cmake/Modules/TBBConfig.cmake
index 24854d74e912..9b02dde8fcb7 100644
--- a/dpnp/backend/cmake/Modules/TBBConfig.cmake
+++ b/dpnp/backend/cmake/Modules/TBBConfig.cmake
@@ -74,7 +74,12 @@ foreach (_tbb_component ${TBB_FIND_COMPONENTS})
         if (NOT TARGET TBB::${_tbb_component})
             add_library(TBB::${_tbb_component} SHARED IMPORTED)
 
-            get_filename_component(_tbb_include_dir "${_tbb_root}/include" ABSOLUTE)
+	    find_path(_tbb_include_dir
+	      NAMES include
+	      PATHS ${_tbb_root}
+	      HITNS ENV TBB_ROOT_HINT
+	      )
+	    
             set_target_properties(TBB::${_tbb_component} PROPERTIES
                                   INTERFACE_INCLUDE_DIRECTORIES "${_tbb_include_dir}")
             unset(_tbb_current_realpath)

From 45c6c116c6b3d0b02f1447919497004fc6956bfd Mon Sep 17 00:00:00 2001
From: Oleksandr Pavlyk <oleksandr.pavlyk@intel.com>
Date: Fri, 24 Mar 2023 06:54:16 -0500
Subject: [PATCH 030/129] Ensure to use vendored TBBConfig.cmake and
 oneDPLConfig.cmake

This must be done to work-around issues in cmake scripts included
in the respective tbb-devel and onedpl-devel conda packages.
---
 CMakeLists.txt                             | 4 ++--
 dpnp/backend/cmake/Modules/TBBConfig.cmake | 9 +++++----
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index d49afe3bd9bf..cd537070ec78 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -28,7 +28,7 @@ set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${DPCTL_MODULE_PATH})
 
 
 find_package(IntelDPCPP REQUIRED)
-find_package(TBB REQUIRED HINTS ${CMAKE_SOURCE_DIR}/dpnp/backend/cmake/Modules)
+find_package(TBB REQUIRED PATHS ${CMAKE_SOURCE_DIR}/dpnp/backend/cmake/Modules NO_DEFAULT_PATH)
 
 set(MKL_ARCH "intel64")
 set(MKL_LINK "dynamic")
@@ -36,7 +36,7 @@ set(MKL_INTERFACE_FULL "intel_ilp64")
 set(MKL_THREADING "tbb_thread")
 find_package(MKL REQUIRED)
 
-find_package(oneDPL REQUIRED HINTS ${CMAKE_SOURCE_DIR}/dpnp/backend/cmake/Modules)
+find_package(oneDPL REQUIRED PATHS ${CMAKE_SOURCE_DIR}/dpnp/backend/cmake/Modules NO_DEFAULT_PATH)
 
 include(GNUInstallDirs)
 
diff --git a/dpnp/backend/cmake/Modules/TBBConfig.cmake b/dpnp/backend/cmake/Modules/TBBConfig.cmake
index 9b02dde8fcb7..2b0593e59817 100644
--- a/dpnp/backend/cmake/Modules/TBBConfig.cmake
+++ b/dpnp/backend/cmake/Modules/TBBConfig.cmake
@@ -57,14 +57,14 @@ endif()
 
 foreach (_tbb_component ${TBB_FIND_COMPONENTS})
     set(TBB_${_tbb_component}_FOUND 0)
-    
-    find_library(_tbb_release_lib NAMES lib${_tbb_component}${_bin_version}.so.${_${_tbb_component}_bin_version}
+
+    find_library(_tbb_release_lib lib${_tbb_component}${_bin_version}.so.${_${_tbb_component}_bin_version}
                   PATHS ${_tbb_root}
                   HINTS ENV TBB_ROOT_HINT
                   PATH_SUFFIXES "lib" "lib/${_tbb_subdir}")
 
     if (NOT TBB_FIND_RELEASE_ONLY)
-        find_library(_tbb_debug_lib NAMES lib${_tbb_component}${_bin_version}_debug.so.${_${_tbb_component}_bin_version}
+        find_library(_tbb_debug_lib lib${_tbb_component}${_bin_version}_debug.so.${_${_tbb_component}_bin_version}
                      PATHS ${_tbb_root}
                      HINTS ENV TBB_ROOT_HINT
                      PATH_SUFFIXES "lib" "lib/${_tbb_subdir}")
@@ -75,8 +75,9 @@ foreach (_tbb_component ${TBB_FIND_COMPONENTS})
             add_library(TBB::${_tbb_component} SHARED IMPORTED)
 
 	    find_path(_tbb_include_dir
-	      NAMES include
+	      oneapi/tbb.h
 	      PATHS ${_tbb_root}
+	      PATH_SUFFIXES include
 	      HITNS ENV TBB_ROOT_HINT
 	      )
 	    

From d9b0a2d474a8adaf5961874fc61d409dce9e9265 Mon Sep 17 00:00:00 2001
From: Oleksandr Pavlyk <oleksandr.pavlyk@intel.com>
Date: Fri, 24 Mar 2023 09:21:39 -0500
Subject: [PATCH 031/129] HINTs should point to %CONDA_PREFIX%/Library

---
 conda-recipe/bld.bat | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/conda-recipe/bld.bat b/conda-recipe/bld.bat
index 7f4a5bacb867..17bff70138dc 100644
--- a/conda-recipe/bld.bat
+++ b/conda-recipe/bld.bat
@@ -11,9 +11,9 @@ SET "SETUPTOOLS_USE_DISTUTILS=stdlib"
 
 "%PYTHON%" setup.py clean --all
 
-set "MKL_ROOT_HINT=%CONDA_PREFIX%"
-set "TBB_ROOT_HINT=%CONDA_PREFIX%"
-set "DPL_ROOT_HINT=%CONDA_PREFIX%"
+set "MKL_ROOT_HINT=%CONDA_PREFIX%/Library"
+set "TBB_ROOT_HINT=%CONDA_PREFIX%/Library"
+set "DPL_ROOT_HINT=%CONDA_PREFIX%/Library"
 
 %PYTHON% -m dpctl --cmakedir > Output
 set /p DPCTL_CMAKE_DIR= < Output

From 5257e0f4805df25d9fbeb4ef189a99326bc268f2 Mon Sep 17 00:00:00 2001
From: Oleksandr Pavlyk <oleksandr.pavlyk@intel.com>
Date: Fri, 24 Mar 2023 09:21:46 -0500
Subject: [PATCH 032/129] Fixed typos HITNS->HINTS, improved find_path call for
 _dpl_headers

Also allows find_library in TBBConfig.cmake to find Windows libraries.
---
 dpnp/backend/cmake/Modules/TBBConfig.cmake    | 8 ++++----
 dpnp/backend/cmake/Modules/oneDPLConfig.cmake | 6 +++---
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/dpnp/backend/cmake/Modules/TBBConfig.cmake b/dpnp/backend/cmake/Modules/TBBConfig.cmake
index 2b0593e59817..6eeaba70d1eb 100644
--- a/dpnp/backend/cmake/Modules/TBBConfig.cmake
+++ b/dpnp/backend/cmake/Modules/TBBConfig.cmake
@@ -32,7 +32,7 @@ endif()
 
 get_filename_component(_tbb_root "${CMAKE_CURRENT_LIST_DIR}" REALPATH)
 get_filename_component(_tbb_root "${_tbb_root}/../../.." ABSOLUTE)
-
+         
 set(TBB_INTERFACE_VERSION )
 
 set(_tbb_bin_version 12)
@@ -58,13 +58,13 @@ endif()
 foreach (_tbb_component ${TBB_FIND_COMPONENTS})
     set(TBB_${_tbb_component}_FOUND 0)
 
-    find_library(_tbb_release_lib lib${_tbb_component}${_bin_version}.so.${_${_tbb_component}_bin_version}
+    find_library(_tbb_release_lib NAMES ${_tbb_component}${_bin_version} lib${_tbb_component}${_bin_version}.so.${_${_tbb_component}_bin_version}
                   PATHS ${_tbb_root}
                   HINTS ENV TBB_ROOT_HINT
                   PATH_SUFFIXES "lib" "lib/${_tbb_subdir}")
 
     if (NOT TBB_FIND_RELEASE_ONLY)
-        find_library(_tbb_debug_lib lib${_tbb_component}${_bin_version}_debug.so.${_${_tbb_component}_bin_version}
+        find_library(_tbb_debug_lib ${_tbb_component}${_bin_version}_debug lib${_tbb_component}${_bin_version}_debug.so.${_${_tbb_component}_bin_version}
                      PATHS ${_tbb_root}
                      HINTS ENV TBB_ROOT_HINT
                      PATH_SUFFIXES "lib" "lib/${_tbb_subdir}")
@@ -78,7 +78,7 @@ foreach (_tbb_component ${TBB_FIND_COMPONENTS})
 	      oneapi/tbb.h
 	      PATHS ${_tbb_root}
 	      PATH_SUFFIXES include
-	      HITNS ENV TBB_ROOT_HINT
+	      HINTS ENV TBB_ROOT_HINT
 	      )
 	    
             set_target_properties(TBB::${_tbb_component} PROPERTIES
diff --git a/dpnp/backend/cmake/Modules/oneDPLConfig.cmake b/dpnp/backend/cmake/Modules/oneDPLConfig.cmake
index ebb8da6e0345..833c298be84e 100755
--- a/dpnp/backend/cmake/Modules/oneDPLConfig.cmake
+++ b/dpnp/backend/cmake/Modules/oneDPLConfig.cmake
@@ -24,10 +24,10 @@ endif()
 
 
 find_path(_onedpl_headers
-  NAMES include
+  NAMES oneapi/dpl
   PATHS ${_onedpl_root}
-  HITNS ENV DPL_ROOT_HINT
-  PATH_SUFFIXES "." ${_onedpl_headers_subdir}
+  HINTS ENV DPL_ROOT_HINT
+  PATH_SUFFIXES include ${_onedpl_headers_subdir}/include
 )
 
 

From a9a5b4b0c5b6619f979c11dd1b75fb50c025f9b5 Mon Sep 17 00:00:00 2001
From: Oleksandr Pavlyk <oleksandr.pavlyk@intel.com>
Date: Fri, 24 Mar 2023 11:02:51 -0500
Subject: [PATCH 033/129] Vendor MKLConfig.cmake too to fix issue with
 MKL_DLL_FILE search paths

---
 CMakeLists.txt                             |   2 +-
 dpnp/backend/cmake/Modules/MKLConfig.cmake | 851 +++++++++++++++++++++
 2 files changed, 852 insertions(+), 1 deletion(-)
 create mode 100644 dpnp/backend/cmake/Modules/MKLConfig.cmake

diff --git a/CMakeLists.txt b/CMakeLists.txt
index cd537070ec78..581b903b6cc7 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -34,7 +34,7 @@ set(MKL_ARCH "intel64")
 set(MKL_LINK "dynamic")
 set(MKL_INTERFACE_FULL "intel_ilp64")
 set(MKL_THREADING "tbb_thread")
-find_package(MKL REQUIRED)
+find_package(MKL REQUIRED PATHS ${CMAKE_SOURCE_DIR}/dpnp/backend/cmake/Modules NO_DEFAULT_PATH)
 
 find_package(oneDPL REQUIRED PATHS ${CMAKE_SOURCE_DIR}/dpnp/backend/cmake/Modules NO_DEFAULT_PATH)
 
diff --git a/dpnp/backend/cmake/Modules/MKLConfig.cmake b/dpnp/backend/cmake/Modules/MKLConfig.cmake
new file mode 100644
index 000000000000..38b3e72fb443
--- /dev/null
+++ b/dpnp/backend/cmake/Modules/MKLConfig.cmake
@@ -0,0 +1,851 @@
+#===============================================================================
+# Copyright 2021-2022 Intel Corporation.
+#
+# This software and the related documents are Intel copyrighted  materials,  and
+# your use of  them is  governed by the  express license  under which  they were
+# provided to you (License).  Unless the License provides otherwise, you may not
+# use, modify, copy, publish, distribute,  disclose or transmit this software or
+# the related documents without Intel's prior written permission.
+#
+# This software and the related documents  are provided as  is,  with no express
+# or implied  warranties,  other  than those  that are  expressly stated  in the
+# License.
+#===============================================================================
+
+#===================================================================
+# CMake Config file for Intel(R) oneAPI Math Kernel Library (oneMKL)
+#===================================================================
+
+#===============================================================================
+# Input parameters
+#=================
+#-------------
+# Main options
+#-------------
+# MKL_ROOT: oneMKL root directory (May be required for non-standard install locations. Optional otherwise.)
+#    Default: use location from MKLROOT environment variable or <Full path to this file>/../../../ if MKLROOT is not defined
+# MKL_ARCH
+#    Values:  ia32 intel64
+#    Default: intel64
+# MKL_LINK
+#    Values:  static, dynamic, sdl
+#    Default: dynamic
+#       Exceptions:- DPC++ doesn't support sdl
+# MKL_THREADING
+#    Values:  sequential,
+#             intel_thread (Intel OpenMP),
+#             gnu_thread (GNU OpenMP),
+#             pgi_thread (PGI OpenMP),
+#             tbb_thread
+#    Default: intel_thread
+#       Exceptions:- DPC++ defaults to tbb, PGI compiler on Windows defaults to pgi_thread
+# MKL_INTERFACE (for MKL_ARCH=intel64 only)
+#    Values:  lp64, ilp64
+#       GNU or INTEL interface will be selected based on Compiler.
+#    Default: ilp64
+# MKL_MPI
+#    Values:  intelmpi, mpich, openmpi, msmpi, mshpc
+#    Default: intelmpi
+#-----------------------------------
+# Special options (OFF by default)
+#-----------------------------------
+# ENABLE_BLAS95:      Enables BLAS Fortran95 API
+# ENABLE_LAPACK95:    Enables LAPACK Fortran95 API
+# ENABLE_BLACS:       Enables cluster BLAS library
+# ENABLE_CDFT:        Enables cluster DFT library
+# ENABLE_CPARDISO:    Enables cluster PARDISO functionality
+# ENABLE_SCALAPACK:   Enables cluster LAPACK library
+# ENABLE_OMP_OFFLOAD: Enables OpenMP Offload functionality
+#
+#==================
+# Output parameters
+#==================
+# MKL_ROOT
+#     oneMKL root directory.
+# MKL_INCLUDE
+#     Use of target_include_directories() is recommended.
+#     INTERFACE_INCLUDE_DIRECTORIES property is set on mkl_core and mkl_rt libraries.
+#     Alternatively, this variable can be used directly (not recommended as per Modern CMake)
+# MKL_ENV
+#     Provides all environment variables based on input parameters.
+#     Currently useful for mkl_rt linking and BLACS on Windows.
+#     Must be set as an ENVIRONMENT property.
+# Example:
+#     add_test(NAME mytest COMMAND myexe)
+#     if(MKL_ENV)
+#       set_tests_properties(mytest PROPERTIES ENVIRONMENT "${MKL_ENV}")
+#     endif()
+#
+# MKL::<library name>
+#     IMPORTED targets to link MKL libraries individually or when using a custom link-line.
+#     mkl_core and mkl_rt have INTERFACE_* properties set to them.
+#     Please refer to Intel(R) oneMKL Link Line Advisor for help with linking.
+#
+# Below INTERFACE targets provide full link-lines for direct use.
+# Example:
+#     target_link_options(<my_linkable_target> PUBLIC $<LINK_ONLY:MKL::MKL>)
+#
+# MKL::MKL
+#     Link line for C and Fortran API
+# MKL::MKL_DPCPP
+#     Link line for DPC++ API
+#
+# Note: For Device API, library linking is not required.
+#       Compile options can be added from the INTERFACE_COMPILE_OPTIONS property on MKL::MKL_DPCPP
+#       Include directories can be added from the INTERFACE_INCLUDE_DIRECTORIES property on MKL::MKL_DPCPP
+#
+# Note: Output parameters' and targets' availability can change
+# based on Input parameters and application project languages.
+#===============================================================================
+
+function(mkl_message MSG_MODE MSG_TEXT)
+  if(MSG_MODE STREQUAL "FATAL_ERROR")
+    message(${MSG_MODE} ${MSG_TEXT})
+  else()
+    if(NOT MKL_FIND_QUIETLY)
+      message(${MSG_MODE} ${MSG_TEXT})
+    endif()
+  endif()
+endfunction()
+
+if(${CMAKE_VERSION} VERSION_LESS "3.13")
+  mkl_message(FATAL_ERROR "The minimum supported CMake version is 3.13. You are running version ${CMAKE_VERSION}")
+endif()
+
+include_guard()
+include(FindPackageHandleStandardArgs)
+
+if(NOT MKL_LIBRARIES)
+
+# Set CMake policies for well-defined behavior across CMake versions
+cmake_policy(SET CMP0011 NEW)
+cmake_policy(SET CMP0057 NEW)
+
+# Project Languages
+get_property(languages GLOBAL PROPERTY ENABLED_LANGUAGES)
+list(APPEND MKL_LANGS C CXX Fortran)
+foreach(lang ${languages})
+  if(${lang} IN_LIST MKL_LANGS)
+    list(APPEND CURR_LANGS ${lang})
+  endif()
+endforeach()
+list(REMOVE_DUPLICATES CURR_LANGS)
+
+option(ENABLE_BLAS95      "Enables BLAS Fortran95 API"            OFF)
+option(ENABLE_LAPACK95    "Enables LAPACK Fortran95 API"          OFF)
+option(ENABLE_BLACS       "Enables cluster BLAS library"          OFF)
+option(ENABLE_CDFT        "Enables cluster DFT library"           OFF)
+option(ENABLE_CPARDISO    "Enables cluster PARDISO functionality" OFF)
+option(ENABLE_SCALAPACK   "Enables cluster LAPACK library"        OFF)
+option(ENABLE_OMP_OFFLOAD "Enables OpenMP Offload functionality"  OFF)
+
+# Use MPI if any of these are enabled
+if(ENABLE_BLACS OR ENABLE_CDFT OR ENABLE_SCALAPACK OR ENABLE_CPARDISO)
+  set(USE_MPI ON)
+endif()
+
+# Check Parameters
+function(define_param TARGET_PARAM DEFAULT_PARAM SUPPORTED_LIST)
+  if(NOT DEFINED ${TARGET_PARAM} AND NOT DEFINED ${DEFAULT_PARAM})
+    mkl_message(STATUS "${TARGET_PARAM}: Undefined")
+  elseif(NOT DEFINED ${TARGET_PARAM} AND DEFINED ${DEFAULT_PARAM})
+    set(${TARGET_PARAM} "${${DEFAULT_PARAM}}" CACHE STRING "Choose ${TARGET_PARAM} options are: ${${SUPPORTED_LIST}}")
+    foreach(opt ${${DEFAULT_PARAM}})
+      set(STR_LIST "${STR_LIST} ${opt}")
+    endforeach()
+    mkl_message(STATUS "${TARGET_PARAM}: None, set to `${STR_LIST}` by default")
+  elseif(${SUPPORTED_LIST})
+    set(ITEM_FOUND 1)
+    foreach(opt ${${TARGET_PARAM}})
+      if(NOT ${opt} IN_LIST ${SUPPORTED_LIST})
+        set(ITEM_FOUND 0)
+      endif()
+    endforeach()
+    if(ITEM_FOUND EQUAL 0)
+      foreach(opt ${${SUPPORTED_LIST}})
+        set(STR_LIST "${STR_LIST} ${opt}")
+      endforeach()
+      mkl_message(FATAL_ERROR "Invalid ${TARGET_PARAM} `${${TARGET_PARAM}}`, options are: ${STR_LIST}")
+    else()
+      mkl_message(STATUS "${TARGET_PARAM}: ${${TARGET_PARAM}}")
+    endif()
+  else()
+    mkl_message(STATUS "${TARGET_PARAM}: ${${TARGET_PARAM}}")
+  endif()
+endfunction()
+
+#================
+# Compiler checks
+#================
+
+if(CMAKE_C_COMPILER)
+  get_filename_component(C_COMPILER_NAME ${CMAKE_C_COMPILER} NAME)
+endif()
+if(CMAKE_CXX_COMPILER)
+  get_filename_component(CXX_COMPILER_NAME ${CMAKE_CXX_COMPILER} NAME)
+endif()
+if(CMAKE_Fortran_COMPILER)
+  get_filename_component(Fortran_COMPILER_NAME ${CMAKE_Fortran_COMPILER} NAME)
+endif()
+
+# Determine Compiler Family
+if(CXX_COMPILER_NAME STREQUAL "dpcpp" OR CXX_COMPILER_NAME STREQUAL "dpcpp.exe"
+    OR CXX_COMPILER_NAME STREQUAL "icpx" OR CXX_COMPILER_NAME STREQUAL "icx.exe")
+  set(DPCPP_COMPILER ON)
+endif()
+if(C_COMPILER_NAME MATCHES "^clang")
+  set(CLANG_COMPILER ON)
+endif()
+if(CMAKE_C_COMPILER_ID STREQUAL "PGI" OR CMAKE_Fortran_COMPILER_ID STREQUAL "PGI")
+  set(PGI_COMPILER ON)
+elseif(CMAKE_C_COMPILER_ID STREQUAL "Intel" OR CMAKE_Fortran_COMPILER_ID STREQUAL "Intel"
+        OR CMAKE_C_COMPILER_ID STREQUAL "IntelLLVM" OR CMAKE_Fortran_COMPILER_ID STREQUAL "IntelLLVM")
+  set(INTEL_COMPILER ON)
+else()
+  if(CMAKE_C_COMPILER_ID STREQUAL "GNU")
+    set(GNU_C_COMPILER ON)
+  endif()
+  if(CMAKE_Fortran_COMPILER_ID STREQUAL "GNU")
+    set(GNU_Fortran_COMPILER ON)
+  endif()
+endif()
+
+if(USE_MPI AND (C_COMPILER_NAME MATCHES "^mpi" OR Fortran_COMPILER_NAME MATCHES "^mpi"))
+  set(USE_MPI_SCRIPT ON)
+endif()
+
+#================
+
+#================
+# System-specific
+#================
+
+# Extensions
+if(UNIX)
+  set(LIB_PREFIX "lib")
+  set(LIB_EXT ".a")
+  set(DLL_EXT ".so")
+  if(APPLE)
+    set(DLL_EXT ".dylib")
+  endif()
+  set(LINK_PREFIX "-l")
+  set(LINK_SUFFIX "")
+else()
+  set(LIB_PREFIX "")
+  set(LIB_EXT ".lib")
+  set(DLL_EXT "_dll.lib")
+  set(LINK_PREFIX "")
+  set(LINK_SUFFIX ".lib")
+endif()
+
+# Set target system architecture
+set(DEFAULT_MKL_ARCH intel64)
+if(DPCPP_COMPILER OR PGI_COMPILER OR ENABLE_OMP_OFFLOAD OR USE_MPI)
+  set(MKL_ARCH_LIST intel64)
+else()
+  set(MKL_ARCH_LIST ia32 intel64)
+endif()
+define_param(MKL_ARCH DEFAULT_MKL_ARCH MKL_ARCH_LIST)
+
+#================
+
+#==========
+# Setup MKL
+#==========
+
+# Set MKL_ROOT directory
+if(NOT DEFINED MKL_ROOT)
+  if(DEFINED ENV{MKLROOT})
+    set(MKL_ROOT $ENV{MKLROOT})
+  else()
+    get_filename_component(MKL_CMAKE_PATH "${CMAKE_CURRENT_LIST_DIR}" REALPATH)
+    get_filename_component(MKL_ROOT "${MKL_CMAKE_PATH}/../../../" ABSOLUTE)
+    mkl_message(STATUS "MKL_ROOT ${MKL_ROOT}")
+  endif()
+endif()
+string(REPLACE "\\" "/" MKL_ROOT ${MKL_ROOT})
+
+# Define MKL_LINK
+set(DEFAULT_MKL_LINK dynamic)
+if(DPCPP_COMPILER OR USE_MPI)
+  set(MKL_LINK_LIST static dynamic)
+else()
+  set(MKL_LINK_LIST static dynamic sdl)
+endif()
+define_param(MKL_LINK DEFAULT_MKL_LINK MKL_LINK_LIST)
+
+# Define MKL_INTERFACE
+if(MKL_ARCH STREQUAL "intel64")
+  set(IFACE_TYPE intel)
+  if(GNU_Fortran_COMPILER)
+    set(IFACE_TYPE gf)
+  endif()
+  if(DPCPP_COMPILER)
+    if(MKL_INTERFACE)
+      set(MKL_INTERFACE_FULL intel_${MKL_INTERFACE})
+    endif()
+    set(DEFAULT_MKL_INTERFACE intel_ilp64)
+    set(MKL_INTERFACE_LIST intel_ilp64)
+  else()
+    if(MKL_INTERFACE)
+      set(MKL_INTERFACE_FULL ${IFACE_TYPE}_${MKL_INTERFACE})
+    endif()
+    set(DEFAULT_MKL_INTERFACE ${IFACE_TYPE}_ilp64)
+    set(MKL_INTERFACE_LIST ${IFACE_TYPE}_ilp64 ${IFACE_TYPE}_lp64)
+  endif()
+  define_param(MKL_INTERFACE_FULL DEFAULT_MKL_INTERFACE MKL_INTERFACE_LIST)
+else()
+  if(WIN32)
+    set(MKL_INTERFACE_FULL intel_c)
+  elseif(NOT APPLE)
+    if(GNU_Fortran_COMPILER)
+      set(MKL_INTERFACE_FULL gf)
+    else()
+      set(MKL_INTERFACE_FULL intel)
+    endif()
+  else()
+    mkl_message(FATAL_ERROR "OSX does not support MKL_ARCH ia32.")
+  endif()
+endif()
+if(MKL_INTERFACE_FULL MATCHES "ilp64")
+  set(MKL_INTERFACE "ilp64")
+else()
+  set(MKL_INTERFACE "lp64")
+endif()
+
+# Define MKL headers
+find_path(MKL_H mkl.h
+  HINTS ${MKL_ROOT}
+  PATH_SUFFIXES include)
+list(APPEND MKL_INCLUDE ${MKL_H})
+
+# Add pre-built F95 Interface Modules
+if(INTEL_COMPILER AND (ENABLE_BLAS95 OR ENABLE_LAPACK95))
+  if(MKL_ARCH STREQUAL "intel64")
+    list(APPEND MKL_INCLUDE "${MKL_ROOT}/include/${MKL_ARCH}/${MKL_INTERFACE}")
+  else()
+    list(APPEND MKL_INCLUDE "${MKL_ROOT}/include/${MKL_ARCH}")
+  endif()
+endif()
+
+# Define MKL_THREADING
+# All APIs support sequential threading
+set(MKL_THREADING_LIST "sequential" "intel_thread" "tbb_thread")
+set(DEFAULT_MKL_THREADING intel_thread)
+# DPC++ API supports TBB threading, but not OpenMP threading
+if(DPCPP_COMPILER)
+  set(DEFAULT_MKL_THREADING tbb_thread)
+  list(REMOVE_ITEM MKL_THREADING_LIST intel_thread)
+# C, Fortran API
+elseif(PGI_COMPILER)
+  # PGI compiler supports PGI OpenMP threading, additionally
+  list(APPEND MKL_THREADING_LIST pgi_thread)
+  # PGI compiler does not support TBB threading
+  list(REMOVE_ITEM MKL_THREADING_LIST tbb_thread)
+  if(WIN32)
+    # PGI 19.10 and 20.1 on Windows, do not support Intel OpenMP threading
+    list(REMOVE_ITEM MKL_THREADING_LIST intel_thread)
+    set(DEFAULT_MKL_THREADING pgi_thread)
+  endif()
+elseif(GNU_C_COMPILER OR GNU_Fortran_COMPILER OR CLANG_COMPILER)
+  list(APPEND MKL_THREADING_LIST gnu_thread)
+else()
+  # Intel and Microsoft compilers
+  # Nothing to do, only for completeness
+endif()
+define_param(MKL_THREADING DEFAULT_MKL_THREADING MKL_THREADING_LIST)
+
+# Define MKL_MPI
+set(DEFAULT_MKL_MPI intelmpi)
+if(UNIX)
+  if(APPLE)
+    # Override defaults for OSX
+    set(DEFAULT_MKL_MPI mpich)
+    set(MKL_MPI_LIST mpich)
+  else()
+    set(MKL_MPI_LIST intelmpi openmpi mpich mpich2)
+  endif()
+else()
+  # Windows
+  set(MKL_MPI_LIST intelmpi mshpc msmpi)
+endif()
+define_param(MKL_MPI DEFAULT_MKL_MPI MKL_MPI_LIST)
+# MSMPI is now called MSHPC. MSMPI option exists for backward compatibility.
+if(MKL_MPI STREQUAL "mshpc")
+  set(MKL_MPI msmpi)
+endif()
+find_package_handle_standard_args(MKL REQUIRED_VARS MKL_MPI)
+
+# Checkpoint - Verify if required options are defined
+find_package_handle_standard_args(MKL REQUIRED_VARS MKL_ROOT MKL_ARCH MKL_INCLUDE MKL_LINK MKL_THREADING MKL_INTERFACE_FULL)
+
+# Provides a list of IMPORTED targets for the project
+if(NOT DEFINED MKL_IMPORTED_TARGETS)
+  set(MKL_IMPORTED_TARGETS "")
+endif()
+
+# Clear temporary variables
+set(MKL_C_COPT "")
+set(MKL_F_COPT "")
+set(MKL_SDL_COPT "")
+set(MKL_CXX_COPT "")
+set(MKL_DPCPP_COPT "")
+set(MKL_DPCPP_LOPT "")
+set(MKL_OFFLOAD_COPT "")
+set(MKL_OFFLOAD_LOPT "")
+
+set(MKL_SUPP_LINK "")    # Other link options. Usually at the end of the link-line.
+set(MKL_LINK_LINE)       # For MPI only
+set(MKL_ENV_PATH "")     # Temporary variable to work with PATH
+set(MKL_ENV "")          # Exported environment variables
+
+# Modify PATH variable to make it CMake-friendly
+set(OLD_PATH $ENV{PATH})
+string(REPLACE ";" "\;" OLD_PATH "${OLD_PATH}")
+
+# Compiler options
+if(GNU_C_COMPILER OR GNU_Fortran_COMPILER)
+  if(MKL_ARCH STREQUAL "ia32")
+    list(APPEND MKL_C_COPT -m32)
+    list(APPEND MKL_F_COPT -m32)
+  else()
+    list(APPEND MKL_C_COPT -m64)
+    list(APPEND MKL_F_COPT -m64)
+  endif()
+endif()
+
+# Additonal compiler & linker options
+if(CXX_COMPILER_NAME STREQUAL "icpx" OR CXX_COMPILER_NAME STREQUAL "icx.exe")
+  list(APPEND MKL_DPCPP_COPT "-fsycl")
+  list(APPEND MKL_DPCPP_LOPT "-fsycl")
+endif()
+if(DPCPP_COMPILER OR ENABLE_OMP_OFFLOAD)
+  if(MKL_LINK STREQUAL "static")
+    list(APPEND MKL_DPCPP_LOPT "-fsycl-device-code-split=per_kernel")
+    list(APPEND MKL_OFFLOAD_LOPT "-fsycl-device-code-split=per_kernel")
+  endif()
+endif()
+
+# For OpenMP Offload
+if(ENABLE_OMP_OFFLOAD)
+  if(WIN32)
+    if(OPENMP_VERSION VERSION_GREATER_EQUAL "5.1")
+      if("Fortran" IN_LIST CURR_LANGS)
+        list(APPEND MKL_OFFLOAD_COPT -Qiopenmp -Qopenmp-targets:spir64 -DONEMKL_USE_OPENMP_VERSION=202011)
+      else()
+        list(APPEND MKL_OFFLOAD_COPT -Qiopenmp -Qopenmp-targets:spir64 -Qopenmp-version:51 -DONEMKL_USE_OPENMP_VERSION=202011)
+      endif()
+    else()
+      list(APPEND MKL_OFFLOAD_COPT -Qiopenmp -Qopenmp-targets:spir64)
+    endif()
+    # -MD and -MDd are manually added here because offload functionality uses DPC++ runtime.
+    if(CMAKE_BUILD_TYPE MATCHES "Debug|DebInfo")
+      list(APPEND MKL_OFFLOAD_COPT -MDd)
+    else()
+      list(APPEND MKL_OFFLOAD_COPT -MD)
+    endif()
+    list(APPEND MKL_OFFLOAD_LOPT -Qiopenmp -Qopenmp-targets:spir64 -fsycl)
+    set(SKIP_LIBPATH ON)
+  else()
+    if(OPENMP_VERSION VERSION_GREATER_EQUAL "5.1")
+      if("Fortran" IN_LIST CURR_LANGS)
+        list(APPEND MKL_OFFLOAD_COPT -fiopenmp -fopenmp-targets=spir64 -DONEMKL_USE_OPENMP_VERSION=202011)
+      else()
+        list(APPEND MKL_OFFLOAD_COPT -fiopenmp -fopenmp-targets=spir64 -fopenmp-version=51 -DONEMKL_USE_OPENMP_VERSION=202011)
+      endif()
+    else ()
+      list(APPEND MKL_OFFLOAD_COPT -fiopenmp -fopenmp-targets=spir64)
+    endif()
+    list(APPEND MKL_OFFLOAD_LOPT -fiopenmp -fopenmp-targets=spir64 -fsycl)
+    if(APPLE)
+      list(APPEND MKL_SUPP_LINK -lc++)
+    else()
+      list(APPEND MKL_SUPP_LINK -lstdc++)
+    endif()
+  endif()
+endif()
+
+# For selected Interface
+if(MKL_INTERFACE_FULL)
+  if(MKL_ARCH STREQUAL "ia32")
+    if(GNU_Fortran_COMPILER)
+      set(MKL_SDL_IFACE_ENV "GNU")
+    endif()
+  else()
+    if(GNU_Fortran_COMPILER)
+      set(MKL_SDL_IFACE_ENV "GNU,${MKL_INTERFACE}")
+    else()
+      set(MKL_SDL_IFACE_ENV "${MKL_INTERFACE}")
+    endif()
+    if(MKL_INTERFACE STREQUAL "ilp64")
+      if("Fortran" IN_LIST CURR_LANGS)
+        if(INTEL_COMPILER)
+          if(WIN32)
+            list(APPEND MKL_F_COPT "-4I8")
+          else()
+            list(APPEND MKL_F_COPT "-i8")
+          endif()
+        elseif(GNU_Fortran_COMPILER)
+          list(APPEND MKL_F_COPT "-fdefault-integer-8")
+        elseif(PGI_COMPILER)
+          list(APPEND MKL_F_COPT "-i8")
+        endif()
+      endif()
+      list(INSERT MKL_C_COPT 0 "-DMKL_ILP64")
+      list(INSERT MKL_SDL_COPT 0 "-DMKL_ILP64")
+      list(INSERT MKL_CXX_COPT 0 "-DMKL_ILP64")
+      list(INSERT MKL_OFFLOAD_COPT 0 "-DMKL_ILP64")
+    else()
+      # lp64
+    endif()
+  endif()
+  if(MKL_SDL_IFACE_ENV)
+    string(TOUPPER ${MKL_SDL_IFACE_ENV} MKL_SDL_IFACE_ENV)
+  endif()
+endif() # MKL_INTERFACE_FULL
+
+# All MKL Libraries
+if(WIN32 AND CMAKE_BUILD_TYPE MATCHES "Debug|DebInfo")
+  set(MKL_SYCL          mkl_sycld)
+else()
+  set(MKL_SYCL          mkl_sycl)
+endif()
+set(MKL_IFACE_LIB     mkl_${MKL_INTERFACE_FULL})
+set(MKL_CORE          mkl_core)
+if(WIN32 AND CMAKE_BUILD_TYPE MATCHES "Debug|DebInfo" AND MKL_THREADING STREQUAL "tbb_thread")
+  set(MKL_THREAD        mkl_tbb_threadd)
+else()
+  set(MKL_THREAD        mkl_${MKL_THREADING})
+endif()
+set(MKL_SDL           mkl_rt)
+if(MKL_ARCH STREQUAL "ia32")
+  set(MKL_BLAS95      mkl_blas95)
+  set(MKL_LAPACK95    mkl_lapack95)
+else()
+  set(MKL_BLAS95      mkl_blas95_${MKL_INTERFACE})
+  set(MKL_LAPACK95    mkl_lapack95_${MKL_INTERFACE})
+endif()
+# BLACS
+set(MKL_BLACS mkl_blacs_${MKL_MPI}_${MKL_INTERFACE})
+if(UNIX AND NOT APPLE AND MKL_MPI MATCHES "mpich")
+  # MPICH is compatible with INTELMPI Wrappers on Linux
+  set(MKL_BLACS mkl_blacs_intelmpi_${MKL_INTERFACE})
+endif()
+if(WIN32)
+  if(MKL_MPI STREQUAL "msmpi")
+    if("Fortran" IN_LIST CURR_LANGS)
+      list(APPEND MKL_SUPP_LINK "msmpifec.lib")
+    endif()
+    # MSMPI and MSHPC are supported with the same BLACS library
+    set(MKL_BLACS mkl_blacs_msmpi_${MKL_INTERFACE})
+    if(NOT MKL_LINK STREQUAL "static")
+      set(MKL_BLACS mkl_blacs_${MKL_INTERFACE})
+      set(MKL_BLACS_ENV MSMPI)
+    endif()
+  elseif(MKL_MPI STREQUAL "intelmpi" AND NOT MKL_LINK STREQUAL "static")
+    set(MKL_BLACS mkl_blacs_${MKL_INTERFACE})
+    set(MKL_BLACS_ENV INTELMPI)
+  endif()
+endif()
+# CDFT & SCALAPACK
+set(MKL_CDFT      mkl_cdft_core)
+set(MKL_SCALAPACK mkl_scalapack_${MKL_INTERFACE})
+
+
+if (UNIX)
+  if(NOT APPLE)
+    if(MKL_LINK STREQUAL "static")
+      set(START_GROUP "-Wl,--start-group")
+      set(END_GROUP "-Wl,--end-group")
+      if(DPCPP_COMPILER OR ENABLE_OMP_OFFLOAD)
+        set(EXPORT_DYNAMIC "-Wl,-export-dynamic")
+      endif()
+    elseif(MKL_LINK STREQUAL "dynamic")
+      set(MKL_RPATH "-Wl,-rpath=$<TARGET_FILE_DIR:MKL::${MKL_CORE}>")
+      if((GNU_Fortran_COMPILER OR PGI_COMPILER) AND "Fortran" IN_LIST CURR_LANGS)
+        set(NO_AS_NEEDED -Wl,--no-as-needed)
+      endif()
+    else()
+      set(MKL_RPATH "-Wl,-rpath=$<TARGET_FILE_DIR:MKL::${MKL_SDL}>")
+    endif()
+  endif()
+endif()
+
+# Create a list of requested libraries, based on input options (MKL_LIBRARIES)
+# Create full link-line in MKL_LINK_LINE
+list(APPEND MKL_LINK_LINE $<IF:$<BOOL:${ENABLE_OMP_OFFLOAD}>,${MKL_OFFLOAD_LOPT},>
+    $<IF:$<BOOL:${DPCPP_COMPILER}>,${MKL_DPCPP_LOPT},> ${EXPORT_DYNAMIC} ${NO_AS_NEEDED} ${MKL_RPATH})
+if(ENABLE_BLAS95)
+  list(APPEND MKL_LIBRARIES ${MKL_BLAS95})
+  list(APPEND MKL_LINK_LINE MKL::${MKL_BLAS95})
+endif()
+if(ENABLE_LAPACK95)
+  list(APPEND MKL_LIBRARIES ${MKL_LAPACK95})
+  list(APPEND MKL_LINK_LINE MKL::${MKL_LAPACK95})
+endif()
+if(ENABLE_SCALAPACK)
+  list(APPEND MKL_LIBRARIES ${MKL_SCALAPACK})
+  list(APPEND MKL_LINK_LINE MKL::${MKL_SCALAPACK})
+endif()
+if(DPCPP_COMPILER OR (ENABLE_OMP_OFFLOAD AND NOT MKL_LINK STREQUAL "sdl"))
+  list(APPEND MKL_LIBRARIES ${MKL_SYCL})
+  list(APPEND MKL_LINK_LINE MKL::${MKL_SYCL})
+endif()
+list(APPEND MKL_LINK_LINE ${START_GROUP})
+if(ENABLE_CDFT)
+  list(APPEND MKL_LIBRARIES ${MKL_CDFT})
+  list(APPEND MKL_LINK_LINE MKL::${MKL_CDFT})
+endif()
+if(MKL_LINK STREQUAL "sdl")
+  list(APPEND MKL_LIBRARIES ${MKL_SDL})
+  list(APPEND MKL_LINK_LINE MKL::${MKL_SDL})
+else()
+  list(APPEND MKL_LIBRARIES ${MKL_IFACE_LIB} ${MKL_THREAD} ${MKL_CORE})
+  list(APPEND MKL_LINK_LINE MKL::${MKL_IFACE_LIB} MKL::${MKL_THREAD} MKL::${MKL_CORE})
+endif()
+if(USE_MPI)
+  list(APPEND MKL_LIBRARIES ${MKL_BLACS})
+  list(APPEND MKL_LINK_LINE MKL::${MKL_BLACS})
+endif()
+list(APPEND MKL_LINK_LINE ${END_GROUP})
+
+# Find all requested libraries
+foreach(lib ${MKL_LIBRARIES})
+  unset(${lib}_file CACHE)
+  if(MKL_LINK STREQUAL "static" AND NOT ${lib} STREQUAL ${MKL_SDL})
+    find_library(${lib}_file ${LIB_PREFIX}${lib}${LIB_EXT}
+                  PATHS ${MKL_ROOT}
+                  PATH_SUFFIXES "lib" "lib/${MKL_ARCH}")
+    add_library(MKL::${lib} STATIC IMPORTED)
+  else()
+    find_library(${lib}_file NAMES ${LIB_PREFIX}${lib}${DLL_EXT} ${lib}
+                  PATHS ${MKL_ROOT}
+                  PATH_SUFFIXES "lib" "lib/${MKL_ARCH}")
+    add_library(MKL::${lib} SHARED IMPORTED)
+  endif()
+  find_package_handle_standard_args(MKL REQUIRED_VARS ${lib}_file)
+  # CMP0111, implemented in CMake 3.20+ requires a shared library target on Windows
+  # to be defined with IMPLIB and LOCATION property.
+  # It also requires a static library target to be defined with LOCATION property.
+  # Setting the policy to OLD usage, using cmake_policy() does not work as of 3.20.0, hence the if-else below.
+  if(WIN32 AND NOT MKL_LINK STREQUAL "static")
+    set_target_properties(MKL::${lib} PROPERTIES IMPORTED_IMPLIB "${${lib}_file}")
+    # Find corresponding DLL
+    set(MKL_DLL_GLOB ${lib}.*.dll)
+    file(GLOB MKL_DLL_FILE "${MKL_ROOT}/redist/${MKL_ARCH}/${MKL_DLL_GLOB}"
+        "${MKL_ROOT}/../redist/${MKL_ARCH}/${MKL_DLL_GLOB}"
+        "${MKL_ROOT}/../redist/${MKL_ARCH}/mkl/${MKL_DLL_GLOB}"
+	"${MKL_ROOT}/bin")
+    if(NOT ${lib} STREQUAL ${MKL_IFACE_LIB} AND NOT ${lib} STREQUAL ${MKL_BLAS95} AND NOT ${lib} STREQUAL ${MKL_LAPACK95})  # Windows IFACE libs are static only
+      list(LENGTH MKL_DLL_FILE MKL_DLL_FILE_LEN)
+      if(MKL_DLL_FILE_LEN)
+        # in case multiple versions of the same dll are found, select the highest version
+        list(SORT MKL_DLL_FILE)
+        list(REVERSE MKL_DLL_FILE)
+        list(GET MKL_DLL_FILE 0 MKL_DLL_FILE)
+
+        mkl_message(STATUS "Found DLL: ${MKL_DLL_FILE}")
+        set_target_properties(MKL::${lib} PROPERTIES IMPORTED_LOCATION "${MKL_DLL_FILE}")
+      else()
+        mkl_message(FATAL_ERROR "${MKL_DLL_GLOB} not found")
+      endif()
+    endif()
+  else()
+    set_target_properties(MKL::${lib} PROPERTIES IMPORTED_LOCATION "${${lib}_file}")
+  endif()
+  list(APPEND MKL_IMPORTED_TARGETS MKL::${lib})
+endforeach()
+
+# Threading selection
+if(MKL_THREADING)
+  if(MKL_THREADING STREQUAL "tbb_thread")
+    find_package(TBB REQUIRED CONFIG COMPONENTS tbb)
+    set(MKL_THREAD_LIB $<TARGET_LINKER_FILE:TBB::tbb>)
+    set(MKL_SDL_THREAD_ENV "TBB")
+    get_property(TBB_LIB TARGET TBB::tbb PROPERTY IMPORTED_LOCATION_RELEASE)
+    get_filename_component(TBB_LIB_DIR ${TBB_LIB} DIRECTORY)
+    if(UNIX)
+      if(CMAKE_SKIP_BUILD_RPATH)
+        set(TBB_LINK "-L${TBB_LIB_DIR} -ltbb")
+      else()
+        set(TBB_LINK "-Wl,-rpath,${TBB_LIB_DIR} -L${TBB_LIB_DIR} -ltbb")
+      endif()
+      list(APPEND MKL_SUPP_LINK ${TBB_LINK})
+      if(APPLE)
+        list(APPEND MKL_SUPP_LINK -lc++)
+      else()
+        list(APPEND MKL_SUPP_LINK -lstdc++)
+      endif()
+    endif()
+    if(WIN32 OR APPLE)
+      set(MKL_ENV_PATH ${TBB_LIB_DIR})
+    endif()
+  elseif(MKL_THREADING MATCHES "_thread")
+    if(MKL_THREADING STREQUAL "pgi_thread")
+      list(APPEND MKL_SUPP_LINK -mp -pgf90libs)
+      set(MKL_SDL_THREAD_ENV "PGI")
+    elseif(MKL_THREADING STREQUAL "gnu_thread")
+      list(APPEND MKL_SUPP_LINK -lgomp)
+      set(MKL_SDL_THREAD_ENV "GNU")
+    else()
+      # intel_thread
+      if(UNIX)
+        set(MKL_OMP_LIB iomp5)
+        set(LIB_EXT ".so")
+        if(APPLE)
+          set(LIB_EXT ".dylib")
+        endif()
+      else()
+        set(MKL_OMP_LIB libiomp5md)
+      endif()
+      set(MKL_SDL_THREAD_ENV "INTEL")
+      set(OMP_LIBNAME ${LIB_PREFIX}${MKL_OMP_LIB}${LIB_EXT})
+
+      find_library(OMP_LIBRARY ${OMP_LIBNAME}
+        HINTS $ENV{LIB} $ENV{LIBRARY_PATH} $ENV{MKLROOT} ${MKL_ROOT} ${CMPLR_ROOT}
+        PATH_SUFFIXES "lib" "lib/${MKL_ARCH}"
+               "lib/${MKL_ARCH}_lin" "lib/${MKL_ARCH}_win"
+               "linux/compiler/lib/${MKL_ARCH}"
+               "linux/compiler/lib/${MKL_ARCH}_lin"
+               "windows/compiler/lib/${MKL_ARCH}"
+               "windows/compiler/lib/${MKL_ARCH}_win"
+               "../compiler/lib/${MKL_ARCH}_lin" "../compiler/lib/${MKL_ARCH}_win"
+               "../compiler/lib/${MKL_ARCH}" "../compiler/lib"
+               "../../compiler/latest/linux/compiler/lib/${MKL_ARCH}"
+               "../../compiler/latest/linux/compiler/lib/${MKL_ARCH}_lin"
+               "../../compiler/latest/windows/compiler/lib/${MKL_ARCH}"
+               "../../compiler/latest/windows/compiler/lib/${MKL_ARCH}_win"
+               "../../compiler/latest/mac/compiler/lib")
+      if(WIN32)
+        set(OMP_DLLNAME ${LIB_PREFIX}${MKL_OMP_LIB}.dll)
+        find_path(OMP_DLL_DIR ${OMP_DLLNAME}
+          HINTS $ENV{LIB} $ENV{LIBRARY_PATH} $ENV{MKLROOT} ${MKL_ROOT} ${CMPLR_ROOT}
+          PATH_SUFFIXES "redist/${MKL_ARCH}"
+               "redist/${MKL_ARCH}_win" "redist/${MKL_ARCH}_win/compiler"
+               "../redist/${MKL_ARCH}/compiler" "../compiler/lib"
+               "../../compiler/latest/windows/redist/${MKL_ARCH}_win"
+               "../../compiler/latest/windows/redist/${MKL_ARCH}_win/compiler"
+               "../../compiler/latest/windows/compiler/redist/${MKL_ARCH}_win"
+               "../../compiler/latest/windows/compiler/redist/${MKL_ARCH}_win/compiler")
+        find_package_handle_standard_args(MKL REQUIRED_VARS OMP_DLL_DIR)
+        set(MKL_ENV_PATH "${OMP_DLL_DIR}")
+      endif()
+
+      if(WIN32 AND SKIP_LIBPATH)
+        # Only for Intel OpenMP Offload
+        set(OMP_LINK "libiomp5md.lib")
+      else()
+        set(OMP_LINK "${OMP_LIBRARY}")
+        if(CMAKE_C_COMPILER_ID STREQUAL "PGI" OR CMAKE_Fortran_COMPILER_ID STREQUAL "PGI")
+          # Disable PGI OpenMP runtime for correct work of Intel OpenMP runtime
+          list(APPEND MKL_SUPP_LINK -nomp)
+        endif()
+      endif()
+      find_package_handle_standard_args(MKL REQUIRED_VARS OMP_LIBRARY OMP_LINK)
+      set(MKL_THREAD_LIB ${OMP_LINK})
+    endif()
+  else()
+    # Sequential threading
+    set(MKL_SDL_THREAD_ENV "SEQUENTIAL")
+  endif()
+endif() # MKL_THREADING
+
+if (UNIX)
+  list(APPEND MKL_SUPP_LINK -lm -ldl -lpthread)
+endif()
+
+if(DPCPP_COMPILER OR ENABLE_OMP_OFFLOAD)
+  if(WIN32)
+    # Detect sycl library version
+    if(NOT DEFINED SYCL_LIB_VER_CACHE)
+      set(SYCL_LIB_VER "")
+      find_library(SYCL_LIB_DIR ${LIB_PREFIX}sycl${LIB_EXT}
+        HINTS $ENV{LIB} $ENV{CMPLR_ROOT}
+        PATH_SUFFIXES "windows/lib")
+      if(NOT SYCL_LIB_DIR)
+        foreach(ver RANGE 6 99)
+          find_library(SYCL_LIB_DIR ${LIB_PREFIX}sycl${ver}${LIB_EXT}
+            HINTS $ENV{LIB} $ENV{CMPLR_ROOT}
+            PATH_SUFFIXES "windows/lib")
+          if(SYCL_LIB_DIR)
+            set(SYCL_LIB_VER ${ver})
+            break()
+          endif()
+        endforeach()
+      endif()
+      set(SYCL_LIB_VER_CACHE ${SYCL_LIB_VER} CACHE STRING "")
+    endif()
+
+    if(CMAKE_BUILD_TYPE MATCHES "Debug|DebInfo")
+      list(APPEND MKL_SUPP_LINK ${LINK_PREFIX}sycl${SYCL_LIB_VER_CACHE}d${LINK_SUFFIX})
+    else()
+      list(APPEND MKL_SUPP_LINK ${LINK_PREFIX}sycl${SYCL_LIB_VER_CACHE}${LINK_SUFFIX})
+    endif()
+  else()
+    list(APPEND MKL_SUPP_LINK ${LINK_PREFIX}sycl${LINK_SUFFIX})
+  endif()
+  list(APPEND MKL_SUPP_LINK ${LINK_PREFIX}OpenCL${LINK_SUFFIX})
+endif()
+
+# Setup link types based on input options
+set(LINK_TYPES "")
+
+if(DPCPP_COMPILER)
+  add_library(MKL::MKL_DPCPP INTERFACE IMPORTED GLOBAL)
+  target_compile_options(MKL::MKL_DPCPP INTERFACE ${MKL_DPCPP_COPT})
+  target_link_libraries(MKL::MKL_DPCPP INTERFACE ${MKL_LINK_LINE} ${MKL_THREAD_LIB} ${MKL_SUPP_LINK})
+  list(APPEND LINK_TYPES MKL::MKL_DPCPP)
+endif()
+# Single target for all C, Fortran link-lines
+add_library(MKL::MKL INTERFACE IMPORTED GLOBAL)
+target_compile_options(MKL::MKL INTERFACE
+    $<$<STREQUAL:$<TARGET_PROPERTY:LINKER_LANGUAGE>,C>:${MKL_C_COPT}>
+    $<$<STREQUAL:$<TARGET_PROPERTY:LINKER_LANGUAGE>,Fortran>:${MKL_F_COPT}>
+    $<$<STREQUAL:$<TARGET_PROPERTY:LINKER_LANGUAGE>,CXX>:${MKL_CXX_COPT}>
+    $<IF:$<BOOL:${ENABLE_OMP_OFFLOAD}>,${MKL_OFFLOAD_COPT},>)
+target_link_libraries(MKL::MKL INTERFACE ${MKL_LINK_LINE} ${MKL_THREAD_LIB} ${MKL_SUPP_LINK})
+list(APPEND LINK_TYPES MKL::MKL)
+
+foreach(link ${LINK_TYPES})
+  # Set properties on all INTERFACE targets
+  target_include_directories(${link} BEFORE INTERFACE "${MKL_INCLUDE}")
+  list(APPEND MKL_IMPORTED_TARGETS ${link})
+endforeach(link) # LINK_TYPES
+
+if(MKL_LINK STREQUAL "sdl")
+  list(APPEND MKL_ENV "MKL_INTERFACE_LAYER=${MKL_SDL_IFACE_ENV}" "MKL_THREADING_LAYER=${MKL_SDL_THREAD_ENV}")
+endif()
+if(WIN32 AND NOT MKL_LINK STREQUAL "static")
+  list(APPEND MKL_ENV "MKL_BLACS_MPI=${MKL_BLACS_ENV}")
+endif()
+
+# Add MKL dynamic libraries if RPATH is not defined on Unix
+if(UNIX AND CMAKE_SKIP_BUILD_RPATH)
+  if(MKL_LINK STREQUAL "sdl")
+    set(MKL_LIB_DIR $<TARGET_FILE_DIR:MKL::${MKL_SDL}>)
+  else()
+    set(MKL_LIB_DIR $<TARGET_FILE_DIR:MKL::${MKL_CORE}>)
+  endif()
+  if(APPLE)
+    list(APPEND MKL_ENV "DYLD_LIBRARY_PATH=${MKL_LIB_DIR}\;$ENV{DYLD_LIBRARY_PATH}")
+  else()
+    list(APPEND MKL_ENV "LD_LIBRARY_PATH=${MKL_LIB_DIR}\;$ENV{LD_LIBRARY_PATH}")
+  endif()
+endif()
+
+# Add MKL dynamic libraries to PATH on Windows
+if(WIN32 AND NOT MKL_LINK STREQUAL "static")
+  get_filename_component(MKL_DLL_DIR ${MKL_DLL_FILE} DIRECTORY)
+  set(MKL_ENV_PATH "${MKL_DLL_DIR}\;${MKL_ENV_PATH}")
+endif()
+
+if(MKL_ENV_PATH)
+  list(APPEND MKL_ENV "PATH=${MKL_ENV_PATH}\;${OLD_PATH}")
+  if(APPLE)
+    list(APPEND MKL_ENV "DYLD_LIBRARY_PATH=${MKL_ENV_PATH}\:${OLD_PATH}")
+  endif()
+endif()
+
+unset(MKL_DLL_FILE)
+
+endif() # MKL_LIBRARIES

From 3330e9c07aeab0488182305207586878e6c8932c Mon Sep 17 00:00:00 2001
From: Oleksandr Pavlyk <oleksandr.pavlyk@intel.com>
Date: Fri, 24 Mar 2023 14:28:11 -0500
Subject: [PATCH 034/129] Completed support for DPNP_GENERATE_COVERAGE

---
 CMakeLists.txt              | 14 ++++++++++++++
 dpnp/CMakeLists.txt         |  1 -
 dpnp/backend/CMakeLists.txt |  4 ++++
 3 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 581b903b6cc7..5b2ee2b7a22a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -123,4 +123,18 @@ else()
     message(FATAL_ERROR "Unsupported system.")
 endif()
 
+if (DPNP_GENERATE_COVERAGE)
+    string(CONCAT PROFILE_FLAGS
+        "-fprofile-instr-generate "
+        "-fcoverage-mapping "
+        "-fno-sycl-use-footer "
+#        "-save-temps=obj "
+    )
+
+    # Add profiling flags
+    set(CMAKE_CXX_FLAGS
+        "${CMAKE_CXX_FLAGS} ${PROFILE_FLAGS}"
+    )
+endif()
+
 add_subdirectory(dpnp)
diff --git a/dpnp/CMakeLists.txt b/dpnp/CMakeLists.txt
index 7d3936da2d4b..6262fb7150e9 100644
--- a/dpnp/CMakeLists.txt
+++ b/dpnp/CMakeLists.txt
@@ -8,7 +8,6 @@ function(build_dpnp_cython_ext _trgt _src _dest)
   add_dependencies(${_trgt} ${_trgt_deps})
   if (DPNP_GENERATE_COVERAGE)
     target_compile_definitions(${_trgt} PRIVATE CYTHON_TRACE=1 CYTHON_TRACE_NOGIL=1)
-    target_compile_options(${_trgt} PRIVATE -fno-sycl-use-footer)
   endif()
   # NumPy
   target_include_directories(${_trgt} PRIVATE ${NumPy_INCLUDE_DIR})
diff --git a/dpnp/backend/CMakeLists.txt b/dpnp/backend/CMakeLists.txt
index 55263d6ea2e0..f595712ceef9 100644
--- a/dpnp/backend/CMakeLists.txt
+++ b/dpnp/backend/CMakeLists.txt
@@ -64,6 +64,10 @@ if(UNIX)
     target_link_options(${_trgt} PUBLIC -fsycl-link-huge-device-code)
 endif()
 
+if(DPNP_GENERATE_COVERAGE)
+    target_link_options(${_trgt} PRIVATE -fprofile-instr-generate -fcoverage-mapping)
+endif()
+
 target_link_libraries(${_trgt} PUBLIC MKL::MKL_DPCPP)
 target_link_libraries(${_trgt} PUBLIC oneDPL)
 

From 83fa0493332dd970ef42c3a6d5301816087c9eca Mon Sep 17 00:00:00 2001
From: Oleksandr Pavlyk <oleksandr.pavlyk@intel.com>
Date: Fri, 24 Mar 2023 16:58:15 -0500
Subject: [PATCH 035/129] Use PREFIX rather than CONDA_PREFIX in build scripts

---
 conda-recipe/bld.bat  |  6 +++---
 conda-recipe/build.sh | 10 ++++++----
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/conda-recipe/bld.bat b/conda-recipe/bld.bat
index 17bff70138dc..b603abddb195 100644
--- a/conda-recipe/bld.bat
+++ b/conda-recipe/bld.bat
@@ -11,9 +11,9 @@ SET "SETUPTOOLS_USE_DISTUTILS=stdlib"
 
 "%PYTHON%" setup.py clean --all
 
-set "MKL_ROOT_HINT=%CONDA_PREFIX%/Library"
-set "TBB_ROOT_HINT=%CONDA_PREFIX%/Library"
-set "DPL_ROOT_HINT=%CONDA_PREFIX%/Library"
+set "MKLROOT=%PREFIX%/Library"
+set "TBB_ROOT_HINT=%PREFIX%/Library"
+set "DPL_ROOT_HINT=%PREFIX%/Library"
 
 %PYTHON% -m dpctl --cmakedir > Output
 set /p DPCTL_CMAKE_DIR= < Output
diff --git a/conda-recipe/build.sh b/conda-recipe/build.sh
index ff97b397a1e9..8d832e5cb96c 100644
--- a/conda-recipe/build.sh
+++ b/conda-recipe/build.sh
@@ -6,10 +6,12 @@ export ICPXCFG="$(pwd)/icpx_for_conda.cfg"
 export ICXCFG="$(pwd)/icpx_for_conda.cfg"
 
 export CMAKE_GENERATOR="Ninja"
-export TBB_ROOT_HINT=$CONDA_PREFIX
-export DPL_ROOT_HINT=$CONDA_PREFIX
-export MKL_ROOT_HINT=$CONDA_PREFIX
-SKBUILD_ARGS="-- -DDPCTL_MODULE_PATH=$($PYTHON -m dpctl --cmakedir) -DCMAKE_C_COMPILER:PATH=icx -DCMAKE_CXX_COMPILER:PATH=icpx -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON"
+export TBB_ROOT_HINT=$PREFIX
+export DPL_ROOT_HINT=$PREFIX
+export MKL_ROOT_HINT=$PREFIX
+SKBUILD_ARGS="-- -DDPCTL_MODULE_PATH=$($PYTHON -m dpctl --cmakedir) "
+SKBUILD_ARGS="${SKBUILD_ARGS} -DCMAKE_C_COMPILER:PATH=icx -DCMAKE_CXX_COMPILER:PATH=icpx"
+SKBUILD_ARGS="${SKBUILD_ARGS} -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON"
 
 # Build wheel package
 if [ "$CONDA_PY" == "36" ]; then

From 3f9773b018a5c1eb03402d97da92cfe346e695b5 Mon Sep 17 00:00:00 2001
From: Oleksandr Pavlyk <oleksandr.pavlyk@intel.com>
Date: Fri, 24 Mar 2023 16:58:45 -0500
Subject: [PATCH 036/129] Fixed cmake scripts to work on Windows with conda
 build

---
 dpnp/backend/cmake/Modules/MKLConfig.cmake |   4 +-
 dpnp/backend/cmake/Modules/TBBConfig.cmake | 100 +++++++++++++++++++--
 2 files changed, 93 insertions(+), 11 deletions(-)

diff --git a/dpnp/backend/cmake/Modules/MKLConfig.cmake b/dpnp/backend/cmake/Modules/MKLConfig.cmake
index 38b3e72fb443..07290e8ea331 100644
--- a/dpnp/backend/cmake/Modules/MKLConfig.cmake
+++ b/dpnp/backend/cmake/Modules/MKLConfig.cmake
@@ -635,7 +635,7 @@ foreach(lib ${MKL_LIBRARIES})
     file(GLOB MKL_DLL_FILE "${MKL_ROOT}/redist/${MKL_ARCH}/${MKL_DLL_GLOB}"
         "${MKL_ROOT}/../redist/${MKL_ARCH}/${MKL_DLL_GLOB}"
         "${MKL_ROOT}/../redist/${MKL_ARCH}/mkl/${MKL_DLL_GLOB}"
-	"${MKL_ROOT}/bin")
+	"${MKL_ROOT}/bin/${MKL_DLL_GLOB}")
     if(NOT ${lib} STREQUAL ${MKL_IFACE_LIB} AND NOT ${lib} STREQUAL ${MKL_BLAS95} AND NOT ${lib} STREQUAL ${MKL_LAPACK95})  # Windows IFACE libs are static only
       list(LENGTH MKL_DLL_FILE MKL_DLL_FILE_LEN)
       if(MKL_DLL_FILE_LEN)
@@ -647,7 +647,7 @@ foreach(lib ${MKL_LIBRARIES})
         mkl_message(STATUS "Found DLL: ${MKL_DLL_FILE}")
         set_target_properties(MKL::${lib} PROPERTIES IMPORTED_LOCATION "${MKL_DLL_FILE}")
       else()
-        mkl_message(FATAL_ERROR "${MKL_DLL_GLOB} not found")
+        mkl_message(FATAL_ERROR "${MKL_DLL_GLOB} not found. MKL_ROOT was '${MKL_ROOT}'. MKL_DLL_FILE is '${MKL_DLL_FILE}'")
       endif()
     endif()
   else()
diff --git a/dpnp/backend/cmake/Modules/TBBConfig.cmake b/dpnp/backend/cmake/Modules/TBBConfig.cmake
index 6eeaba70d1eb..0233d9307cb3 100644
--- a/dpnp/backend/cmake/Modules/TBBConfig.cmake
+++ b/dpnp/backend/cmake/Modules/TBBConfig.cmake
@@ -55,19 +55,74 @@ else ()
     set(_tbb_subdir ia32/gcc4.8)
 endif()
 
+if (UNIX)
+  set(_tbb_lib_ext ".so")
+  set(_tbb_lib_prefix "lib")
+  set(_tbb_lib_dir_conda "lib")
+  set(_bin_version "")
+elseif (WIN32) 
+  set(_bin_version "")
+  set(_tbb_lib_prefix "")
+  set(_tbb_lib_ext ".dll")
+  set(_tbb_impllib_ext ".lib")
+  set(_tbb_lib_dir_conda "bin")
+  set(_tbb_impllib_dir_conda "lib")
+else()
+    message(FATAL_ERROR "Unsupported platform. Only Unix and Windows are supported.")
+endif()
+
 foreach (_tbb_component ${TBB_FIND_COMPONENTS})
     set(TBB_${_tbb_component}_FOUND 0)
 
-    find_library(_tbb_release_lib NAMES ${_tbb_component}${_bin_version} lib${_tbb_component}${_bin_version}.so.${_${_tbb_component}_bin_version}
-                  PATHS ${_tbb_root}
-                  HINTS ENV TBB_ROOT_HINT
-                  PATH_SUFFIXES "lib" "lib/${_tbb_subdir}")
+if(WIN32)
+    unset(_bin_version)
+    if (_tbb_component STREQUAL tbb)
+        set(_bin_version ${_tbb_bin_version})
+    endif()
+endif()
+
+    message(STATUS "Looking for ")
+    message(STATUS "  NAMES ${_tbb_lib_prefix}${_tbb_component}${_bin_version}${_tbb_lib_ext}")
+    message(STATUS "  PATHS ${_tbb_root}")
+    message(STATUS "  HINTS $ENV{TBB_ROOT_HINT}")
+    message(STATUS "  PATH_SUFFIXES ${_tbb_lib_dir_conda}  lib/${_tbb_subdir}")
+    if(UNIX)
+       find_library(_tbb_release_lib 
+                    NAMES ${_tbb_lib_prefix}${_tbb_component}${_bin_version}${_tbb_lib_ext}
+                    PATHS ${_tbb_root}
+                    HINTS ENV TBB_ROOT_HINT
+                    PATH_SUFFIXES "${_tbb_lib_dir_conda}" "lib/${_tbb_subdir}")
+
+    else()
+
+       find_file(_tbb_release_lib 
+                 NAMES ${_tbb_lib_prefix}${_tbb_component}${_bin_version}${_tbb_lib_ext}
+                 PATHS ${_tbb_root}
+                 HINTS ENV TBB_ROOT_HINT
+                 PATH_SUFFIXES "${_tbb_lib_dir_conda}" "lib/${_tbb_subdir}")
+
+       if (EXISTS "${_tbb_release_lib}")
+          find_library(_tbb_release_impllib 
+                       NAMES ${_tbb_lib_prefix}${_tbb_component}${_bin_version}${_tbb_impllib_ext}
+                       PATHS ${_tbb_root}
+                       HINTS ENV TBB_ROOT_HINT
+                       PATH_SUFFIXES "${_tbb_impllib_dir_conda}" "lib/${_tbb_subdir}")
+       endif()
+    endif()
 
     if (NOT TBB_FIND_RELEASE_ONLY)
-        find_library(_tbb_debug_lib ${_tbb_component}${_bin_version}_debug lib${_tbb_component}${_bin_version}_debug.so.${_${_tbb_component}_bin_version}
+        find_library(_tbb_debug_lib 
+                     NAMES ${_tbb_lib_prefix}${_tbb_component}${_bin_version}_debug.${_tbb_lib_ext}
                      PATHS ${_tbb_root}
                      HINTS ENV TBB_ROOT_HINT
-                     PATH_SUFFIXES "lib" "lib/${_tbb_subdir}")
+                     PATH_SUFFIXES "${_tbb_lib_dir_conda}" "lib/${_tbb_subdir}")
+        if(WIN32  AND EXISTS "${_tbb_debug_lib}")
+           find_library(_tbb_debug_impllib 
+                        NAMES ${_tbb_lib_prefix}${_tbb_component}${_bin_version}_debug.${_tbb_impllib_ext}
+                        PATHS ${_tbb_root}
+                        HINTS ENV TBB_ROOT_HINT
+                        PATH_SUFFIXES "${_tbb_impllib_dir_conda}" "lib/${_tbb_subdir}")
+        endif()
     endif()
 
     if (EXISTS "${_tbb_release_lib}" OR EXISTS "${_tbb_debug_lib}")
@@ -80,22 +135,48 @@ foreach (_tbb_component ${TBB_FIND_COMPONENTS})
 	      PATH_SUFFIXES include
 	      HINTS ENV TBB_ROOT_HINT
 	      )
-	    
-            set_target_properties(TBB::${_tbb_component} PROPERTIES
-                                  INTERFACE_INCLUDE_DIRECTORIES "${_tbb_include_dir}")
+
+if(WIN32)
+            set_target_properties(
+                TBB::${_tbb_component} PROPERTIES
+                INTERFACE_INCLUDE_DIRECTORIES "${_tbb_include_dir}"
+                INTERFACE_COMPILE_DEFINITIONS "__TBB_NO_IMPLICIT_LINKAGE=1"
+                )
+else()
+            set_target_properties(
+                TBB::${_tbb_component} PROPERTIES
+                INTERFACE_INCLUDE_DIRECTORIES "${_tbb_include_dir}"
+                )
+endif()
+            message(STATUS "Set ${_tbb_include_dir} for component ${_tbb_component}")
             unset(_tbb_current_realpath)
             unset(_tbb_include_dir)
 
             if (EXISTS "${_tbb_release_lib}")
+if(WIN32)
+                set_target_properties(TBB::${_tbb_component} PROPERTIES
+                                      IMPORTED_LOCATION_RELEASE "${_tbb_release_lib}"
+                                      IMPORTED_IMPLIB_RELEASE "${_tbb_release_impllib}")
+else()
                 set_target_properties(TBB::${_tbb_component} PROPERTIES
                                       IMPORTED_LOCATION_RELEASE "${_tbb_release_lib}")
+endif()
                 set_property(TARGET TBB::${_tbb_component} APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE)
+                message(STATUS "Set IMPORTED_LOCATION_RELEASE ${_tbb_release_lib} for component ${_tbb_component}")
             endif()
 
             if (EXISTS "${_tbb_debug_lib}")
+if(WIN32)
+                set_target_properties(TBB::${_tbb_component} PROPERTIES
+                                      IMPORTED_LOCATION_DEBUG "${_tbb_debug_lib}"
+                                      IMPORTED_IMPLIB_DEBUG "${_tbb_debug_impllib}"
+                )
+else()
                 set_target_properties(TBB::${_tbb_component} PROPERTIES
                                       IMPORTED_LOCATION_DEBUG "${_tbb_debug_lib}")
+endif()
                 set_property(TARGET TBB::${_tbb_component} APPEND PROPERTY IMPORTED_CONFIGURATIONS DEBUG)
+                message(STATUS "Set IMPORTED_LOCATION_DEBUG ${_tbb_release_lib} for component ${_tbb_component}")
             endif()
 
             # Add internal dependencies for imported targets: TBB::tbbmalloc_proxy -> TBB::tbbmalloc
@@ -112,6 +193,7 @@ foreach (_tbb_component ${TBB_FIND_COMPONENTS})
         else()
             message(STATUS "  one or both of:\n   ${_tbb_release_lib}\n    ${_tbb_debug_lib}\n   files must exist.")
         endif()
+        message(STATUS "Value of TBB_ROOT_HINT = '$ENV{TBB_ROOT_HINT}'")
         set(TBB_FOUND FALSE)
     endif()
 endforeach()

From 1dd0589bb2387913b3caf0d9368ad21b24fe7511 Mon Sep 17 00:00:00 2001
From: Oleksandr Pavlyk <oleksandr.pavlyk@intel.com>
Date: Fri, 24 Mar 2023 19:13:23 -0500
Subject: [PATCH 037/129] Removed debugging message(STATUS msg_txt)

---
 dpnp/backend/cmake/Modules/TBBConfig.cmake | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/dpnp/backend/cmake/Modules/TBBConfig.cmake b/dpnp/backend/cmake/Modules/TBBConfig.cmake
index 0233d9307cb3..6a3f4f7a43a6 100644
--- a/dpnp/backend/cmake/Modules/TBBConfig.cmake
+++ b/dpnp/backend/cmake/Modules/TBBConfig.cmake
@@ -81,11 +81,6 @@ if(WIN32)
     endif()
 endif()
 
-    message(STATUS "Looking for ")
-    message(STATUS "  NAMES ${_tbb_lib_prefix}${_tbb_component}${_bin_version}${_tbb_lib_ext}")
-    message(STATUS "  PATHS ${_tbb_root}")
-    message(STATUS "  HINTS $ENV{TBB_ROOT_HINT}")
-    message(STATUS "  PATH_SUFFIXES ${_tbb_lib_dir_conda}  lib/${_tbb_subdir}")
     if(UNIX)
        find_library(_tbb_release_lib 
                     NAMES ${_tbb_lib_prefix}${_tbb_component}${_bin_version}${_tbb_lib_ext}
@@ -94,7 +89,6 @@ endif()
                     PATH_SUFFIXES "${_tbb_lib_dir_conda}" "lib/${_tbb_subdir}")
 
     else()
-
        find_file(_tbb_release_lib 
                  NAMES ${_tbb_lib_prefix}${_tbb_component}${_bin_version}${_tbb_lib_ext}
                  PATHS ${_tbb_root}
@@ -148,7 +142,6 @@ else()
                 INTERFACE_INCLUDE_DIRECTORIES "${_tbb_include_dir}"
                 )
 endif()
-            message(STATUS "Set ${_tbb_include_dir} for component ${_tbb_component}")
             unset(_tbb_current_realpath)
             unset(_tbb_include_dir)
 
@@ -162,7 +155,6 @@ else()
                                       IMPORTED_LOCATION_RELEASE "${_tbb_release_lib}")
 endif()
                 set_property(TARGET TBB::${_tbb_component} APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE)
-                message(STATUS "Set IMPORTED_LOCATION_RELEASE ${_tbb_release_lib} for component ${_tbb_component}")
             endif()
 
             if (EXISTS "${_tbb_debug_lib}")
@@ -176,7 +168,6 @@ else()
                                       IMPORTED_LOCATION_DEBUG "${_tbb_debug_lib}")
 endif()
                 set_property(TARGET TBB::${_tbb_component} APPEND PROPERTY IMPORTED_CONFIGURATIONS DEBUG)
-                message(STATUS "Set IMPORTED_LOCATION_DEBUG ${_tbb_release_lib} for component ${_tbb_component}")
             endif()
 
             # Add internal dependencies for imported targets: TBB::tbbmalloc_proxy -> TBB::tbbmalloc
@@ -193,7 +184,6 @@ endif()
         else()
             message(STATUS "  one or both of:\n   ${_tbb_release_lib}\n    ${_tbb_debug_lib}\n   files must exist.")
         endif()
-        message(STATUS "Value of TBB_ROOT_HINT = '$ENV{TBB_ROOT_HINT}'")
         set(TBB_FOUND FALSE)
     endif()
 endforeach()

From bab5fdf802af203c1ccff5c712eef10aeb7bd484 Mon Sep 17 00:00:00 2001
From: Oleksandr Pavlyk <oleksandr.pavlyk@intel.com>
Date: Fri, 24 Mar 2023 19:14:04 -0500
Subject: [PATCH 038/129] Use no-approx-func and no-finite-math-only to build
 backend

---
 dpnp/backend/CMakeLists.txt | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/dpnp/backend/CMakeLists.txt b/dpnp/backend/CMakeLists.txt
index f595712ceef9..515072c2dad1 100644
--- a/dpnp/backend/CMakeLists.txt
+++ b/dpnp/backend/CMakeLists.txt
@@ -58,6 +58,12 @@ set_target_properties(${_trgt} PROPERTIES CMAKE_POSITION_INDEPENDENT_CODE ON)
 target_include_directories(${_trgt} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
 target_include_directories(${_trgt} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/src)
 
+target_compile_options(${_trgt} PUBLIC
+  -fno-approx-func
+  -fno-finite-math-only
+)
+
+
 target_link_options(${_trgt} PUBLIC -fsycl-device-code-split=per_kernel)
 if(UNIX)
     # this option is support on Linux only

From cc32075087e52ffbbce7b9f4a7e39f7485122fc0 Mon Sep 17 00:00:00 2001
From: Oleksandr Pavlyk <oleksandr.pavlyk@intel.com>
Date: Sun, 26 Mar 2023 09:09:12 -0500
Subject: [PATCH 039/129] Request TBB backend for oneDPL

---
 CMakeLists.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 5b2ee2b7a22a..088fd6ee1ca1 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -36,6 +36,7 @@ set(MKL_INTERFACE_FULL "intel_ilp64")
 set(MKL_THREADING "tbb_thread")
 find_package(MKL REQUIRED PATHS ${CMAKE_SOURCE_DIR}/dpnp/backend/cmake/Modules NO_DEFAULT_PATH)
 
+set(ONEDPL_PAR_BACKEND tbb)
 find_package(oneDPL REQUIRED PATHS ${CMAKE_SOURCE_DIR}/dpnp/backend/cmake/Modules NO_DEFAULT_PATH)
 
 include(GNUInstallDirs)

From e23ecd73d5a8cc8c89c5e1c488444078e5e1dba8 Mon Sep 17 00:00:00 2001
From: Oleksandr Pavlyk <oleksandr.pavlyk@intel.com>
Date: Sun, 26 Mar 2023 09:09:52 -0500
Subject: [PATCH 040/129] Vendored oneDPLConfig.cmake should ensure calling
 vendored TBBConfig.cmake

---
 dpnp/backend/cmake/Modules/oneDPLConfig.cmake | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/dpnp/backend/cmake/Modules/oneDPLConfig.cmake b/dpnp/backend/cmake/Modules/oneDPLConfig.cmake
index 833c298be84e..6473d20c69f5 100755
--- a/dpnp/backend/cmake/Modules/oneDPLConfig.cmake
+++ b/dpnp/backend/cmake/Modules/oneDPLConfig.cmake
@@ -45,7 +45,9 @@ if (EXISTS "${_onedpl_headers}")
         endif()
 
         if (NOT ONEDPL_PAR_BACKEND OR ONEDPL_PAR_BACKEND STREQUAL "tbb")  # Handle oneTBB backend
-            find_package(TBB 2021 QUIET COMPONENTS tbb)
+            if (NOT TBB_FOUND)
+               find_package(TBB 2021 QUIET COMPONENTS tbb PATHS ${CMAKE_SOURCE_DIR}/dpnp/backend/cmake/Modules NO_DEFAULT_PATH)
+            endif()
             if (NOT TBB_FOUND AND ONEDPL_PAR_BACKEND STREQUAL "tbb")  # If oneTBB backend is requested explicitly, but not found.
                 message(STATUS "oneDPL: ONEDPL_PAR_BACKEND=${ONEDPL_PAR_BACKEND} requested, but not found")
                 set(oneDPL_FOUND FALSE)

From b14555e14e575552f6a18fec2ac353db061eb266 Mon Sep 17 00:00:00 2001
From: Oleksandr Pavlyk <oleksandr.pavlyk@intel.com>
Date: Sun, 26 Mar 2023 09:11:07 -0500
Subject: [PATCH 041/129] Ensure library is installed in dpnp on Win

Also work-around issue with Cmake-3.22 and DPC++ integration to
assure per-kernel code splitting.
---
 dpnp/backend/CMakeLists.txt | 38 +++++++++++++++++++++----------------
 1 file changed, 22 insertions(+), 16 deletions(-)

diff --git a/dpnp/backend/CMakeLists.txt b/dpnp/backend/CMakeLists.txt
index 515072c2dad1..59d0392b1535 100644
--- a/dpnp/backend/CMakeLists.txt
+++ b/dpnp/backend/CMakeLists.txt
@@ -47,21 +47,27 @@ set(DPNP_SRC
     src/dpnp_random_state.cpp
     )
 
-find_package(MKL REQUIRED)
-find_package(oneDPL REQUIRED)
-
 set(_trgt "dpnp_backend_c")
 
+if(WIN32)
+    if (${CMAKE_VERSION} VERSION_LESS "3.23")
+        # this is a work-around for target_link_options inserting option after -link option, cause
+        # linker to ignore it.
+        set(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} -fsycl-device-code-split=per_kernel")
+    endif()
+endif()
+
 add_library(${_trgt} SHARED ${DPNP_SRC})
 set_target_properties(${_trgt} PROPERTIES CMAKE_POSITION_INDEPENDENT_CODE ON)
 
-target_include_directories(${_trgt} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
-target_include_directories(${_trgt} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/src)
+target_include_directories(${_trgt} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include)
+target_include_directories(${_trgt} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/src)
 
-target_compile_options(${_trgt} PUBLIC
+target_compile_options(${_trgt} PRIVATE
   -fno-approx-func
   -fno-finite-math-only
 )
+target_compile_definitions(${_trgt} PRIVATE _WIN=1)
 
 
 target_link_options(${_trgt} PUBLIC -fsycl-device-code-split=per_kernel)
@@ -95,16 +101,16 @@ if(DPNP_BACKEND_TESTS)
   add_subdirectory(tests)
 endif()
 
-
+set(_lib_destination dpnp)
+set(_include_destination dpnp/backend/include)
 install(
   TARGETS ${_trgt}
-  LIBRARY DESTINATION dpnp
-  PERMISSIONS
-  OWNER_WRITE
-  OWNER_READ
-  OWNER_EXECUTE
-  GROUP_READ
-  GROUP_EXECUTE
-  WORLD_READ
-  WORLD_EXECUTE
+  LIBRARY
+    DESTINATION ${_lib_destination}
+  ARCHIVE
+    DESTINATION ${_lib_destination}
+  RUNTIME
+    DESTINATION ${_lib_destination}
+  PUBLIC_HEADER
+    DESTINATION ${_include_destination}
   )

From 4cc94927f5b5bc516df2cde7975ad1a347b8378d Mon Sep 17 00:00:00 2001
From: Oleksandr Pavlyk <oleksandr.pavlyk@intel.com>
Date: Mon, 27 Mar 2023 13:07:52 -0500
Subject: [PATCH 042/129] Work around lack of support for -fno-approx-func in
 icx.exe

---
 dpnp/backend/CMakeLists.txt | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/dpnp/backend/CMakeLists.txt b/dpnp/backend/CMakeLists.txt
index 59d0392b1535..0fd601ad420f 100644
--- a/dpnp/backend/CMakeLists.txt
+++ b/dpnp/backend/CMakeLists.txt
@@ -63,10 +63,18 @@ set_target_properties(${_trgt} PROPERTIES CMAKE_POSITION_INDEPENDENT_CODE ON)
 target_include_directories(${_trgt} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include)
 target_include_directories(${_trgt} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/src)
 
-target_compile_options(${_trgt} PRIVATE
-  -fno-approx-func
-  -fno-finite-math-only
-)
+if (WIN32)
+  target_compile_options(${_trgt} PRIVATE
+    /clang:-fno-approx-func
+    /clang:-fno-finite-math-only
+    )
+else()
+  target_compile_options(${_trgt} PRIVATE
+    -fno-approx-func
+    -fno-finite-math-only
+    )
+endif()
+
 target_compile_definitions(${_trgt} PRIVATE _WIN=1)
 
 

From 52e5ee06fa109a7f7bb7bae157b067eb20758fab Mon Sep 17 00:00:00 2001
From: Evseniia Komarova <evseniia.komarova@intel.com>
Date: Tue, 28 Mar 2023 00:47:24 +0200
Subject: [PATCH 043/129] rename dpctl to dpnp in bld.bat for wheels copying

---
 conda-recipe/bld.bat | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conda-recipe/bld.bat b/conda-recipe/bld.bat
index b603abddb195..5793f9f5d5e7 100644
--- a/conda-recipe/bld.bat
+++ b/conda-recipe/bld.bat
@@ -46,7 +46,7 @@ if NOT "%WHEELS_OUTPUT_FOLDER%"=="" (
     rem Install and assemble wheel package from the build bits
     "%PYTHON%" setup.py install bdist_wheel %SKBUILD_ARGS%
     if errorlevel 1 exit 1
-    copy dist\dpctl*.whl %WHEELS_OUTPUT_FOLDER%
+    copy dist\dpnp*.whl %WHEELS_OUTPUT_FOLDER%
     if errorlevel 1 exit 1
 ) ELSE (
     rem Only install

From 3c870f6a9d81e3b432a6a1d3721cdb056359e296 Mon Sep 17 00:00:00 2001
From: Natalia Polina <natalia.polina@intel.com>
Date: Wed, 29 Mar 2023 17:00:20 -0500
Subject: [PATCH 044/129] Allow asarray to work on sequences of dpnp_array with
 support compute-follows-data.

---
 dpnp/dpnp_container.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dpnp/dpnp_container.py b/dpnp/dpnp_container.py
index 12d28074b8fb..3be26b3d9769 100644
--- a/dpnp/dpnp_container.py
+++ b/dpnp/dpnp_container.py
@@ -91,7 +91,6 @@ def asarray(x1,
     else:
         x1_obj = x1
 
-    sycl_queue_normalized = dpnp.get_normalized_queue_device(x1_obj, device=device, sycl_queue=sycl_queue)
     if order is None:
         order = 'C'
 
@@ -100,8 +99,9 @@ def asarray(x1,
                             dtype=dtype,
                             copy=copy,
                             order=order,
+                            device=device,
                             usm_type=usm_type,
-                            sycl_queue=sycl_queue_normalized)
+                            sycl_queue=sycl_queue)
     return dpnp_array(array_obj.shape, buffer=array_obj, order=order)
 
 

From 05683972b41b0efdde280276e290131a812a20e2 Mon Sep 17 00:00:00 2001
From: Natalia Polina <natalia.polina@intel.com>
Date: Wed, 29 Mar 2023 17:03:53 -0500
Subject: [PATCH 045/129] Added tests for asarray function.

---
 tests/test_sycl_queue.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/tests/test_sycl_queue.py b/tests/test_sycl_queue.py
index a523c46465bf..6994facf6ff9 100644
--- a/tests/test_sycl_queue.py
+++ b/tests/test_sycl_queue.py
@@ -945,3 +945,15 @@ def test_broadcast_to(device):
     x = dpnp.arange(5, device=device)
     y = dpnp.broadcast_to(x, (3, 5))
     assert_sycl_queue_equal(x.sycl_queue, y.sycl_queue)
+
+
+@pytest.mark.parametrize("device_x",
+                         valid_devices,
+                         ids=[device.filter_string for device in valid_devices])
+@pytest.mark.parametrize("device_y",
+                         valid_devices,
+                         ids=[device.filter_string for device in valid_devices])
+def test_asarray(device_x, device_y):
+    x = dpnp.array([1, 2, 3], device=device_x)
+    y = dpnp.asarray([x], device=device_y)
+    assert_sycl_queue_equal(y.sycl_queue, x.to_device(device_y).sycl_queue)

From 530f94f31399d9c2645c632dd5065e92b4961dc7 Mon Sep 17 00:00:00 2001
From: Oleksandr Pavlyk <oleksandr.pavlyk@intel.com>
Date: Fri, 31 Mar 2023 11:34:59 -0500
Subject: [PATCH 046/129] Only set GXX-specific defined on Unix

Commented out setting of _WIN define. Does not seem to be used anywhere in dpnp/backend
---
 dpnp/backend/CMakeLists.txt | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/dpnp/backend/CMakeLists.txt b/dpnp/backend/CMakeLists.txt
index 0fd601ad420f..894d3b6e72d4 100644
--- a/dpnp/backend/CMakeLists.txt
+++ b/dpnp/backend/CMakeLists.txt
@@ -75,8 +75,9 @@ else()
     )
 endif()
 
-target_compile_definitions(${_trgt} PRIVATE _WIN=1)
-
+# if (WIN32)
+#   target_compile_definitions(${_trgt} PRIVATE _WIN=1)
+# endif()
 
 target_link_options(${_trgt} PUBLIC -fsycl-device-code-split=per_kernel)
 if(UNIX)
@@ -91,8 +92,11 @@ endif()
 target_link_libraries(${_trgt} PUBLIC MKL::MKL_DPCPP)
 target_link_libraries(${_trgt} PUBLIC oneDPL)
 
-# needed for STL headers with GCC < 11
-target_compile_definitions(${_trgt} PUBLIC _GLIBCXX_USE_TBB_PAR_BACKEND=0)
+if (UNIX)
+  # needed for STL headers with GCC < 11
+  target_compile_definitions(${_trgt} PUBLIC _GLIBCXX_USE_TBB_PAR_BACKEND=0)
+endif()
+
 target_compile_definitions(${_trgt} PUBLIC PSTL_USE_PARALLEL_POLICIES=0)
 # work-around for Windows at exit crash with predefined policies
 target_compile_definitions(${_trgt} PUBLIC ONEDPL_USE_PREDEFINED_POLICIES=0)

From 01a11e42e65bbe34ee8a75e295c20ba5394b61d3 Mon Sep 17 00:00:00 2001
From: Natalia Polina <natalia.polina@intel.com>
Date: Fri, 31 Mar 2023 11:43:28 -0500
Subject: [PATCH 047/129] sycl_queue check returned for dpnp.asarray()
 function.

---
 dpnp/dpnp_container.py | 34 +++++++++++++++++++++++-----------
 1 file changed, 23 insertions(+), 11 deletions(-)

diff --git a/dpnp/dpnp_container.py b/dpnp/dpnp_container.py
index 3be26b3d9769..5bd6f460496f 100644
--- a/dpnp/dpnp_container.py
+++ b/dpnp/dpnp_container.py
@@ -86,22 +86,34 @@ def asarray(x1,
             usm_type=None,
             sycl_queue=None):
     """Converts `x1` to `dpnp_array`."""
-    if isinstance(x1, dpnp_array):
-        x1_obj = x1.get_array()
-    else:
-        x1_obj = x1
+    dpu.validate_usm_type(usm_type, allow_none=True)
 
     if order is None:
         order = 'C'
 
     """Converts incoming 'x1' object to 'dpnp_array'."""
-    array_obj = dpt.asarray(x1_obj,
-                            dtype=dtype,
-                            copy=copy,
-                            order=order,
-                            device=device,
-                            usm_type=usm_type,
-                            sycl_queue=sycl_queue)
+    if isinstance(x1, (list, tuple, range)):
+        array_obj = dpt.asarray(x1,
+                                dtype=dtype,
+                                copy=copy,
+                                order=order,
+                                device=device,
+                                usm_type=usm_type,
+                                sycl_queue=sycl_queue)
+    else:
+        if isinstance(x1, dpnp_array):
+            x1_obj = x1.get_array()
+        else:
+            x1_obj = x1
+
+        sycl_queue_normalized = dpnp.get_normalized_queue_device(x1_obj, device=device, sycl_queue=sycl_queue)
+
+        array_obj = dpt.asarray(x1_obj,
+                                dtype=dtype,
+                                copy=copy,
+                                order=order,
+                                usm_type=usm_type,
+                                sycl_queue=sycl_queue_normalized)
     return dpnp_array(array_obj.shape, buffer=array_obj, order=order)
 
 

From c7858afe140ed5e778e81de7875b7f3ec947146f Mon Sep 17 00:00:00 2001
From: Anton <100830759+antonwolfy@users.noreply.github.com>
Date: Fri, 31 Mar 2023 20:03:33 +0200
Subject: [PATCH 048/129] Update tests/test_sycl_queue.py

---
 tests/test_sycl_queue.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_sycl_queue.py b/tests/test_sycl_queue.py
index 6994facf6ff9..ab974e426f93 100644
--- a/tests/test_sycl_queue.py
+++ b/tests/test_sycl_queue.py
@@ -955,5 +955,5 @@ def test_broadcast_to(device):
                          ids=[device.filter_string for device in valid_devices])
 def test_asarray(device_x, device_y):
     x = dpnp.array([1, 2, 3], device=device_x)
-    y = dpnp.asarray([x], device=device_y)
+    y = dpnp.asarray([x, x, x], device=device_y)
     assert_sycl_queue_equal(y.sycl_queue, x.to_device(device_y).sycl_queue)

From c88ded75432e261716fb048615d77ceef05633cf Mon Sep 17 00:00:00 2001
From: Oleksandr Pavlyk <oleksandr.pavlyk@intel.com>
Date: Fri, 31 Mar 2023 15:04:21 -0500
Subject: [PATCH 049/129] Renamed included dpnp_algo_*.pyx files to *.pxi

---
 dpnp/dpnp_algo/CMakeLists.txt                 | 26 +++++++++----------
 dpnp/dpnp_algo/dpnp_algo.pyx                  | 26 +++++++++----------
 ...eation.pyx => dpnp_algo_arraycreation.pxi} |  0
 ...algo_bitwise.pyx => dpnp_algo_bitwise.pxi} |  0
 ...go_counting.pyx => dpnp_algo_counting.pxi} |  0
 ...go_indexing.pyx => dpnp_algo_indexing.pxi} |  0
 ...lgebra.pyx => dpnp_algo_linearalgebra.pxi} |  0
 ...pnp_algo_logic.pyx => dpnp_algo_logic.pxi} |  0
 ...ulation.pyx => dpnp_algo_manipulation.pxi} |  0
 ...matical.pyx => dpnp_algo_mathematical.pxi} |  0
 ..._searching.pyx => dpnp_algo_searching.pxi} |  0
 ...algo_sorting.pyx => dpnp_algo_sorting.pxi} |  0
 ...algo_special.pyx => dpnp_algo_special.pxi} |  0
 ...tatistics.pyx => dpnp_algo_statistics.pxi} |  0
 ...metric.pyx => dpnp_algo_trigonometric.pxi} |  0
 15 files changed, 26 insertions(+), 26 deletions(-)
 rename dpnp/dpnp_algo/{dpnp_algo_arraycreation.pyx => dpnp_algo_arraycreation.pxi} (100%)
 rename dpnp/dpnp_algo/{dpnp_algo_bitwise.pyx => dpnp_algo_bitwise.pxi} (100%)
 rename dpnp/dpnp_algo/{dpnp_algo_counting.pyx => dpnp_algo_counting.pxi} (100%)
 rename dpnp/dpnp_algo/{dpnp_algo_indexing.pyx => dpnp_algo_indexing.pxi} (100%)
 rename dpnp/dpnp_algo/{dpnp_algo_linearalgebra.pyx => dpnp_algo_linearalgebra.pxi} (100%)
 rename dpnp/dpnp_algo/{dpnp_algo_logic.pyx => dpnp_algo_logic.pxi} (100%)
 rename dpnp/dpnp_algo/{dpnp_algo_manipulation.pyx => dpnp_algo_manipulation.pxi} (100%)
 rename dpnp/dpnp_algo/{dpnp_algo_mathematical.pyx => dpnp_algo_mathematical.pxi} (100%)
 rename dpnp/dpnp_algo/{dpnp_algo_searching.pyx => dpnp_algo_searching.pxi} (100%)
 rename dpnp/dpnp_algo/{dpnp_algo_sorting.pyx => dpnp_algo_sorting.pxi} (100%)
 rename dpnp/dpnp_algo/{dpnp_algo_special.pyx => dpnp_algo_special.pxi} (100%)
 rename dpnp/dpnp_algo/{dpnp_algo_statistics.pyx => dpnp_algo_statistics.pxi} (100%)
 rename dpnp/dpnp_algo/{dpnp_algo_trigonometric.pyx => dpnp_algo_trigonometric.pxi} (100%)

diff --git a/dpnp/dpnp_algo/CMakeLists.txt b/dpnp/dpnp_algo/CMakeLists.txt
index 9f2921b53d55..abdf9ae0cb7e 100644
--- a/dpnp/dpnp_algo/CMakeLists.txt
+++ b/dpnp/dpnp_algo/CMakeLists.txt
@@ -1,18 +1,18 @@
 
 set(dpnp_algo_pyx_deps
-  ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_linearalgebra.pyx
-  ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_manipulation.pyx
-  ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_counting.pyx
-  ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_statistics.pyx
-  ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_trigonometric.pyx
-  ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_sorting.pyx
-  ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_arraycreation.pyx
-  ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_mathematical.pyx
-  ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_searching.pyx
-  ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_indexing.pyx
-  ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_logic.pyx
-  ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_bitwise.pyx
-  ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_special.pyx
+  ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_linearalgebra.pxi
+  ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_manipulation.pxi
+  ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_counting.pxi
+  ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_statistics.pxi
+  ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_trigonometric.pxi
+  ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_sorting.pxi
+  ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_arraycreation.pxi
+  ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_mathematical.pxi
+  ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_searching.pxi
+  ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_indexing.pxi
+  ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_logic.pxi
+  ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_bitwise.pxi
+  ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_special.pxi
   )
 
 build_dpnp_cython_ext_with_backend(
diff --git a/dpnp/dpnp_algo/dpnp_algo.pyx b/dpnp/dpnp_algo/dpnp_algo.pyx
index 2fa9de34b998..9befd6d20d62 100644
--- a/dpnp/dpnp_algo/dpnp_algo.pyx
+++ b/dpnp/dpnp_algo/dpnp_algo.pyx
@@ -60,19 +60,19 @@ __all__ = [
 ]
 
 
-include "dpnp_algo_arraycreation.pyx"
-include "dpnp_algo_bitwise.pyx"
-include "dpnp_algo_counting.pyx"
-include "dpnp_algo_indexing.pyx"
-include "dpnp_algo_linearalgebra.pyx"
-include "dpnp_algo_logic.pyx"
-include "dpnp_algo_manipulation.pyx"
-include "dpnp_algo_mathematical.pyx"
-include "dpnp_algo_searching.pyx"
-include "dpnp_algo_sorting.pyx"
-include "dpnp_algo_special.pyx"
-include "dpnp_algo_statistics.pyx"
-include "dpnp_algo_trigonometric.pyx"
+include "dpnp_algo_arraycreation.pxi"
+include "dpnp_algo_bitwise.pxi"
+include "dpnp_algo_counting.pxi"
+include "dpnp_algo_indexing.pxi"
+include "dpnp_algo_linearalgebra.pxi"
+include "dpnp_algo_logic.pxi"
+include "dpnp_algo_manipulation.pxi"
+include "dpnp_algo_mathematical.pxi"
+include "dpnp_algo_searching.pxi"
+include "dpnp_algo_sorting.pxi"
+include "dpnp_algo_special.pxi"
+include "dpnp_algo_statistics.pxi"
+include "dpnp_algo_trigonometric.pxi"
 
 
 ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_astype_t)(c_dpctl.DPCTLSyclQueueRef,
diff --git a/dpnp/dpnp_algo/dpnp_algo_arraycreation.pyx b/dpnp/dpnp_algo/dpnp_algo_arraycreation.pxi
similarity index 100%
rename from dpnp/dpnp_algo/dpnp_algo_arraycreation.pyx
rename to dpnp/dpnp_algo/dpnp_algo_arraycreation.pxi
diff --git a/dpnp/dpnp_algo/dpnp_algo_bitwise.pyx b/dpnp/dpnp_algo/dpnp_algo_bitwise.pxi
similarity index 100%
rename from dpnp/dpnp_algo/dpnp_algo_bitwise.pyx
rename to dpnp/dpnp_algo/dpnp_algo_bitwise.pxi
diff --git a/dpnp/dpnp_algo/dpnp_algo_counting.pyx b/dpnp/dpnp_algo/dpnp_algo_counting.pxi
similarity index 100%
rename from dpnp/dpnp_algo/dpnp_algo_counting.pyx
rename to dpnp/dpnp_algo/dpnp_algo_counting.pxi
diff --git a/dpnp/dpnp_algo/dpnp_algo_indexing.pyx b/dpnp/dpnp_algo/dpnp_algo_indexing.pxi
similarity index 100%
rename from dpnp/dpnp_algo/dpnp_algo_indexing.pyx
rename to dpnp/dpnp_algo/dpnp_algo_indexing.pxi
diff --git a/dpnp/dpnp_algo/dpnp_algo_linearalgebra.pyx b/dpnp/dpnp_algo/dpnp_algo_linearalgebra.pxi
similarity index 100%
rename from dpnp/dpnp_algo/dpnp_algo_linearalgebra.pyx
rename to dpnp/dpnp_algo/dpnp_algo_linearalgebra.pxi
diff --git a/dpnp/dpnp_algo/dpnp_algo_logic.pyx b/dpnp/dpnp_algo/dpnp_algo_logic.pxi
similarity index 100%
rename from dpnp/dpnp_algo/dpnp_algo_logic.pyx
rename to dpnp/dpnp_algo/dpnp_algo_logic.pxi
diff --git a/dpnp/dpnp_algo/dpnp_algo_manipulation.pyx b/dpnp/dpnp_algo/dpnp_algo_manipulation.pxi
similarity index 100%
rename from dpnp/dpnp_algo/dpnp_algo_manipulation.pyx
rename to dpnp/dpnp_algo/dpnp_algo_manipulation.pxi
diff --git a/dpnp/dpnp_algo/dpnp_algo_mathematical.pyx b/dpnp/dpnp_algo/dpnp_algo_mathematical.pxi
similarity index 100%
rename from dpnp/dpnp_algo/dpnp_algo_mathematical.pyx
rename to dpnp/dpnp_algo/dpnp_algo_mathematical.pxi
diff --git a/dpnp/dpnp_algo/dpnp_algo_searching.pyx b/dpnp/dpnp_algo/dpnp_algo_searching.pxi
similarity index 100%
rename from dpnp/dpnp_algo/dpnp_algo_searching.pyx
rename to dpnp/dpnp_algo/dpnp_algo_searching.pxi
diff --git a/dpnp/dpnp_algo/dpnp_algo_sorting.pyx b/dpnp/dpnp_algo/dpnp_algo_sorting.pxi
similarity index 100%
rename from dpnp/dpnp_algo/dpnp_algo_sorting.pyx
rename to dpnp/dpnp_algo/dpnp_algo_sorting.pxi
diff --git a/dpnp/dpnp_algo/dpnp_algo_special.pyx b/dpnp/dpnp_algo/dpnp_algo_special.pxi
similarity index 100%
rename from dpnp/dpnp_algo/dpnp_algo_special.pyx
rename to dpnp/dpnp_algo/dpnp_algo_special.pxi
diff --git a/dpnp/dpnp_algo/dpnp_algo_statistics.pyx b/dpnp/dpnp_algo/dpnp_algo_statistics.pxi
similarity index 100%
rename from dpnp/dpnp_algo/dpnp_algo_statistics.pyx
rename to dpnp/dpnp_algo/dpnp_algo_statistics.pxi
diff --git a/dpnp/dpnp_algo/dpnp_algo_trigonometric.pyx b/dpnp/dpnp_algo/dpnp_algo_trigonometric.pxi
similarity index 100%
rename from dpnp/dpnp_algo/dpnp_algo_trigonometric.pyx
rename to dpnp/dpnp_algo/dpnp_algo_trigonometric.pxi

From d41ed51747be6c02d240539ee20f10596e3b4793 Mon Sep 17 00:00:00 2001
From: Anton <100830759+antonwolfy@users.noreply.github.com>
Date: Mon, 3 Apr 2023 11:36:06 +0200
Subject: [PATCH 050/129] Support parameter out in dpnp.add() (#1329)

* Suppport parameter out in dpnp.add()

* Update tests/test_mathematical.py

Co-authored-by: vlad-perevezentsev <vladislav.perevezentsev@intel.com>

* Update tests/test_mathematical.py

Co-authored-by: vlad-perevezentsev <vladislav.perevezentsev@intel.com>

* Update tests/test_mathematical.py

* Update tests/test_mathematical.py

Co-authored-by: Natalia Polina <natalia.polina@intel.com>

* Use internal _check_nd_call() function which is common for mathematical ones with 2 input arrays

* Add more test for 'out' parameter

---------

Co-authored-by: vlad-perevezentsev <vladislav.perevezentsev@intel.com>
Co-authored-by: Natalia Polina <natalia.polina@intel.com>
---
 dpnp/dpnp_array.py              |   5 +-
 dpnp/dpnp_iface_mathematical.py |  26 +-------
 tests/test_mathematical.py      | 103 +++++++++++++++++++++++++++++---
 tests/test_strides.py           |  10 ++--
 tests/test_usm_type.py          |   2 +
 5 files changed, 107 insertions(+), 39 deletions(-)

diff --git a/dpnp/dpnp_array.py b/dpnp/dpnp_array.py
index f2ccf56ef76b..70ba6f44580c 100644
--- a/dpnp/dpnp_array.py
+++ b/dpnp/dpnp_array.py
@@ -194,7 +194,10 @@ def __gt__(self, other):
         return dpnp.greater(self, other)
 
  # '__hash__',
- # '__iadd__',
+
+    def __iadd__(self, other):
+        dpnp.add(self, other, out=self)
+        return self
 
     def __iand__(self, other):
         dpnp.bitwise_and(self, other, out=self)
diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py
index a001b055a280..4c19801346b4 100644
--- a/dpnp/dpnp_iface_mathematical.py
+++ b/dpnp/dpnp_iface_mathematical.py
@@ -235,7 +235,7 @@ def add(x1,
     -----------
     Parameters `x1` and `x2` are supported as either scalar, :class:`dpnp.ndarray`
     or :class:`dpctl.tensor.usm_ndarray`, but both `x1` and `x2` can not be scalars at the same time.
-    Parameters `out`, `where`, `dtype` and `subok` are supported with their default values.
+    Parameters `where`, `dtype` and `subok` are supported with their default values.
     Keyword arguments ``kwargs`` are currently unsupported.
     Otherwise the function will be executed sequentially on CPU.
     Input array data types are limited by supported DPNP :ref:`Data types`.
@@ -251,29 +251,7 @@ def add(x1,
 
     """
 
-    if out is not None:
-        pass
-    elif where is not True:
-        pass
-    elif dtype is not None:
-        pass
-    elif subok is not True:
-        pass
-    elif dpnp.isscalar(x1) and dpnp.isscalar(x2):
-        # at least either x1 or x2 has to be an array
-        pass
-    else:
-        # get USM type and queue to copy scalar from the host memory into a USM allocation
-        usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None)
-
-        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False,
-                                           alloc_usm_type=usm_type, alloc_queue=queue)
-        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False,
-                                           alloc_usm_type=usm_type, alloc_queue=queue)
-        if x1_desc and x2_desc:
-            return dpnp_add(x1_desc, x2_desc, dtype=dtype, out=out, where=where).get_pyobj()
-
-    return call_origin(numpy.add, x1, x2, out=out, where=where, dtype=dtype, subok=subok, **kwargs)
+    return _check_nd_call(numpy.add, dpnp_add, x1, x2, out=out, where=where, dtype=dtype, subok=subok, **kwargs)
 
 
 def around(x1, decimals=0, out=None):
diff --git a/tests/test_mathematical.py b/tests/test_mathematical.py
index 4b3b5d07f941..47ee33cb64d0 100644
--- a/tests/test_mathematical.py
+++ b/tests/test_mathematical.py
@@ -1,6 +1,7 @@
 import pytest
 from .helper import (
     get_all_dtypes,
+    get_float_complex_dtypes,
     is_cpu_device,
     is_win_platform
 )
@@ -634,24 +635,108 @@ def test_invalid_shape(self, shape):
             dpnp.trunc(dp_array, out=dp_out)
 
 
+class TestAdd:
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_none=True))
+    def test_add(self, dtype):
+        array1_data = numpy.arange(10)
+        array2_data = numpy.arange(5, 15)
+        out = numpy.empty(10, dtype=dtype)
+
+        # DPNP
+        dp_array1 = dpnp.array(array1_data, dtype=dtype)
+        dp_array2 = dpnp.array(array2_data, dtype=dtype)
+        dp_out = dpnp.array(out, dtype=dtype)
+        result = dpnp.add(dp_array1, dp_array2, out=dp_out)
+
+        # original
+        np_array1 = numpy.array(array1_data, dtype=dtype)
+        np_array2 = numpy.array(array2_data, dtype=dtype)
+        expected = numpy.add(np_array1, np_array2, out=out)
+
+        assert_allclose(expected, result)
+        assert_allclose(out, dp_out)
+
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_none=True))
+    def test_out_dtypes(self, dtype):
+        size = 2 if dtype == dpnp.bool else 10
+
+        np_array1 = numpy.arange(size, 2 * size, dtype=dtype)
+        np_array2 = numpy.arange(size, dtype=dtype)
+        np_out = numpy.empty(size, dtype=numpy.complex64)
+        expected = numpy.add(np_array1, np_array2, out=np_out)
+
+        dp_array1 = dpnp.arange(size, 2 * size, dtype=dtype)
+        dp_array2 = dpnp.arange(size, dtype=dtype)
+        dp_out = dpnp.empty(size, dtype=dpnp.complex64)
+        result = dpnp.add(dp_array1, dp_array2, out=dp_out)
+
+        assert_array_equal(expected, result)
+
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_none=True))
+    def test_out_overlap(self, dtype):
+        size = 1 if dtype == dpnp.bool else 15
+
+        np_a = numpy.arange(2 * size, dtype=dtype)
+        expected = numpy.add(np_a[size::], np_a[::2], out=np_a[:size:])
+
+        dp_a = dpnp.arange(2 * size, dtype=dtype)
+        result = dpnp.add(dp_a[size::], dp_a[::2], out=dp_a[:size:])
+
+        assert_allclose(expected, result)
+        assert_allclose(dp_a, np_a)
+
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_none=True))
+    def test_inplace_strided_out(self, dtype):
+        size = 21
+
+        np_a = numpy.arange(size, dtype=dtype)
+        np_a[::3] += 4
+
+        dp_a = dpnp.arange(size, dtype=dtype)
+        dp_a[::3] += 4
+
+        assert_allclose(dp_a, np_a)
+
+    @pytest.mark.parametrize("shape",
+                             [(0,), (15, ), (2, 2)],
+                             ids=['(0,)', '(15, )', '(2,2)'])
+    def test_invalid_shape(self, shape):
+        dp_array1 = dpnp.arange(10, dtype=dpnp.float64)
+        dp_array2 = dpnp.arange(5, 15, dtype=dpnp.float64)
+        dp_out = dpnp.empty(shape, dtype=dpnp.float64)
+
+        with pytest.raises(ValueError):
+            dpnp.add(dp_array1, dp_array2, out=dp_out)
+
+    @pytest.mark.parametrize("out",
+                             [4, (), [], (3, 7), [2, 4]],
+                             ids=['4', '()', '[]', '(3, 7)', '[2, 4]'])
+    def test_invalid_out(self, out):
+        a = dpnp.arange(10)
+
+        assert_raises(TypeError, dpnp.add, a, 2, out)
+        assert_raises(TypeError, numpy.add, a.asnumpy(), 2, out)
+
+
 class TestPower:
-    def test_power(self):
+    @pytest.mark.parametrize("dtype", get_float_complex_dtypes())
+    def test_power(self, dtype):
         array1_data = numpy.arange(10)
         array2_data = numpy.arange(5, 15)
-        out = numpy.empty(10, dtype=numpy.float64)
+        out = numpy.empty(10, dtype=dtype)
 
         # DPNP
-        dp_array1 = dpnp.array(array1_data, dtype=dpnp.float64)
-        dp_array2 = dpnp.array(array2_data, dtype=dpnp.float64)
-        dp_out = dpnp.array(out, dtype=dpnp.float64)
+        dp_array1 = dpnp.array(array1_data, dtype=dtype)
+        dp_array2 = dpnp.array(array2_data, dtype=dtype)
+        dp_out = dpnp.array(out, dtype=dtype)
         result = dpnp.power(dp_array1, dp_array2, out=dp_out)
 
         # original
-        np_array1 = numpy.array(array1_data, dtype=numpy.float64)
-        np_array2 = numpy.array(array2_data, dtype=numpy.float64)
+        np_array1 = numpy.array(array1_data, dtype=dtype)
+        np_array2 = numpy.array(array2_data, dtype=dtype)
         expected = numpy.power(np_array1, np_array2, out=out)
 
-        assert_array_equal(expected, result)
+        assert_allclose(expected, result)
 
     @pytest.mark.parametrize("dtype", get_all_dtypes(no_complex=True, no_none=True))
     def test_out_dtypes(self, dtype):
@@ -662,7 +747,7 @@ def test_out_dtypes(self, dtype):
         np_out = numpy.empty(size, dtype=numpy.complex64)
         expected = numpy.power(np_array1, np_array2, out=np_out)
 
-        dp_array1 = dpnp.arange(size, 2*size, dtype=dtype)
+        dp_array1 = dpnp.arange(size, 2 * size, dtype=dtype)
         dp_array2 = dpnp.arange(size, dtype=dtype)
         dp_out = dpnp.empty(size, dtype=dpnp.complex64)
         result = dpnp.power(dp_array1, dp_array2, out=dp_out)
diff --git a/tests/test_strides.py b/tests/test_strides.py
index 10bd575bf6a9..8295b03412a3 100644
--- a/tests/test_strides.py
+++ b/tests/test_strides.py
@@ -217,7 +217,7 @@ def test_strides_true_devide(dtype, shape):
 
 
 @pytest.mark.parametrize("func_name",
-                         ["power"])
+                         ["add", "power"])
 @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
 def test_strided_out_2args(func_name, dtype):
     np_out = numpy.ones((5, 3, 2))[::3]
@@ -236,7 +236,7 @@ def test_strided_out_2args(func_name, dtype):
 
 
 @pytest.mark.parametrize("func_name",
-                         ["power"])
+                         ["add", "power"])
 @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
 def test_strided_in_out_2args(func_name, dtype):
     sh = (3, 4, 2)
@@ -258,7 +258,7 @@ def test_strided_in_out_2args(func_name, dtype):
 
 
 @pytest.mark.parametrize("func_name",
-                         ["power"])
+                         ["add", "power"])
 @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
 def test_strided_in_out_2args_diff_out_dtype(func_name, dtype):
     sh = (3, 3, 2)
@@ -280,7 +280,7 @@ def test_strided_in_out_2args_diff_out_dtype(func_name, dtype):
 
 
 @pytest.mark.parametrize("func_name",
-                         ["power"])
+                         ["add", "power"])
 @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True, no_none=True))
 def test_strided_in_2args_overlap(func_name, dtype):
     size = 5
@@ -296,7 +296,7 @@ def test_strided_in_2args_overlap(func_name, dtype):
 
 
 @pytest.mark.parametrize("func_name",
-                         ["power"])
+                         ["add", "power"])
 @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True, no_none=True))
 def test_strided_in_out_2args_overlap(func_name, dtype):
     sh = (4, 3, 2)
diff --git a/tests/test_usm_type.py b/tests/test_usm_type.py
index 817bdee66a57..2d5a3f4f1a4d 100644
--- a/tests/test_usm_type.py
+++ b/tests/test_usm_type.py
@@ -18,6 +18,8 @@ def test_coerced_usm_types_sum(usm_type_x, usm_type_y):
     y = dp.arange(1000, usm_type = usm_type_y)
 
     z = 1.3 + x + y + 2
+    z += x
+    z += 7.4
 
     assert x.usm_type == usm_type_x
     assert y.usm_type == usm_type_y

From 20c262efc86fa94d605baa18fde67e43a29f4724 Mon Sep 17 00:00:00 2001
From: Anton Volkov <antonwolfy@gmail.com>
Date: Mon, 3 Apr 2023 04:57:13 -0500
Subject: [PATCH 051/129] PR template: memento perficientur

---
 .github/pull_request_template.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
index 17140158deee..b5edc3985308 100644
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -2,4 +2,5 @@
 - [ ] Have you added a test, reproducer or referred to issue with a reproducer?
 - [ ] Have you tested your changes locally for CPU and GPU devices?
 - [ ] Have you made sure that new changes do not introduce compiler warnings?
+- [ ] Have you checked performance impact of proposed changes?
 - [ ] If this PR is a work in progress, are you filing the PR as a draft?

From 1f6619a25a0222c528b4c096560556fd092b7b95 Mon Sep 17 00:00:00 2001
From: Anton Volkov <antonwolfy@gmail.com>
Date: Mon, 3 Apr 2023 12:04:34 -0500
Subject: [PATCH 052/129] Support parameter out in dpnp.multiply()

---
 dpnp/dpnp_array.py              |  5 +-
 dpnp/dpnp_iface_mathematical.py | 26 +----------
 tests/test_mathematical.py      | 83 +++++++++++++++++++++++++++++++++
 tests/test_strides.py           | 10 ++--
 tests/test_usm_type.py          |  2 +
 5 files changed, 96 insertions(+), 30 deletions(-)

diff --git a/dpnp/dpnp_array.py b/dpnp/dpnp_array.py
index 70ba6f44580c..b8b962caaa15 100644
--- a/dpnp/dpnp_array.py
+++ b/dpnp/dpnp_array.py
@@ -211,7 +211,10 @@ def __ilshift__(self, other):
 
  # '__imatmul__',
  # '__imod__',
- # '__imul__',
+
+    def __imul__(self, other):
+        dpnp.multiply(self, other, out=self)
+        return self
 
     def __index__(self):
         return self._array_obj.__index__()
diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py
index 4c19801346b4..525180e6106c 100644
--- a/dpnp/dpnp_iface_mathematical.py
+++ b/dpnp/dpnp_iface_mathematical.py
@@ -1155,7 +1155,7 @@ def multiply(x1,
     -----------
     Parameters `x1` and `x2` are supported as either scalar, :class:`dpnp.ndarray`
     or :class:`dpctl.tensor.usm_ndarray`, but both `x1` and `x2` can not be scalars at the same time.
-    Parameters `out`, `where`, `dtype` and `subok` are supported with their default values.
+    Parameters `where`, `dtype` and `subok` are supported with their default values.
     Keyword arguments ``kwargs`` are currently unsupported.
     Otherwise the functions will be executed sequentially on CPU.
     Input array data types are limited by supported DPNP :ref:`Data types`.
@@ -1170,29 +1170,7 @@ def multiply(x1,
 
     """
 
-    if out is not None:
-        pass
-    elif where is not True:
-        pass
-    elif dtype is not None:
-        pass
-    elif subok is not True:
-        pass
-    elif dpnp.isscalar(x1) and dpnp.isscalar(x2):
-        # at least either x1 or x2 has to be an array
-        pass
-    else:
-        # get USM type and queue to copy scalar from the host memory into a USM allocation
-        usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None)
-
-        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False,
-                                           alloc_usm_type=usm_type, alloc_queue=queue)
-        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False,
-                                           alloc_usm_type=usm_type, alloc_queue=queue)
-        if x1_desc and x2_desc:
-            return dpnp_multiply(x1_desc, x2_desc, dtype=dtype, out=out, where=where).get_pyobj()
-
-    return call_origin(numpy.multiply, x1, x2, out=out, where=where, dtype=dtype, subok=subok, **kwargs)
+    return _check_nd_call(numpy.multiply, dpnp_multiply, x1, x2, out=out, where=where, dtype=dtype, subok=subok, **kwargs)
 
 
 def nancumprod(x1, **kwargs):
diff --git a/tests/test_mathematical.py b/tests/test_mathematical.py
index 47ee33cb64d0..ad16a9c7555f 100644
--- a/tests/test_mathematical.py
+++ b/tests/test_mathematical.py
@@ -718,6 +718,89 @@ def test_invalid_out(self, out):
         assert_raises(TypeError, numpy.add, a.asnumpy(), 2, out)
 
 
+class TestMultiply:
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_none=True))
+    def test_multiply(self, dtype):
+        array1_data = numpy.arange(10)
+        array2_data = numpy.arange(5, 15)
+        out = numpy.empty(10, dtype=dtype)
+
+        # DPNP
+        dp_array1 = dpnp.array(array1_data, dtype=dtype)
+        dp_array2 = dpnp.array(array2_data, dtype=dtype)
+        dp_out = dpnp.array(out, dtype=dtype)
+        result = dpnp.multiply(dp_array1, dp_array2, out=dp_out)
+
+        # original
+        np_array1 = numpy.array(array1_data, dtype=dtype)
+        np_array2 = numpy.array(array2_data, dtype=dtype)
+        expected = numpy.multiply(np_array1, np_array2, out=out)
+
+        assert_allclose(expected, result)
+        assert_allclose(out, dp_out)
+
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_none=True))
+    def test_out_dtypes(self, dtype):
+        size = 2 if dtype == dpnp.bool else 10
+
+        np_array1 = numpy.arange(size, 2 * size, dtype=dtype)
+        np_array2 = numpy.arange(size, dtype=dtype)
+        np_out = numpy.empty(size, dtype=numpy.complex64)
+        expected = numpy.multiply(np_array1, np_array2, out=np_out)
+
+        dp_array1 = dpnp.arange(size, 2 * size, dtype=dtype)
+        dp_array2 = dpnp.arange(size, dtype=dtype)
+        dp_out = dpnp.empty(size, dtype=dpnp.complex64)
+        result = dpnp.multiply(dp_array1, dp_array2, out=dp_out)
+
+        assert_array_equal(expected, result)
+
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_none=True))
+    def test_out_overlap(self, dtype):
+        size = 1 if dtype == dpnp.bool else 15
+
+        np_a = numpy.arange(2 * size, dtype=dtype)
+        expected = numpy.multiply(np_a[size::], np_a[::2], out=np_a[:size:])
+
+        dp_a = dpnp.arange(2 * size, dtype=dtype)
+        result = dpnp.multiply(dp_a[size::], dp_a[::2], out=dp_a[:size:])
+
+        assert_allclose(expected, result)
+        assert_allclose(dp_a, np_a)
+
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_none=True))
+    def test_inplace_strided_out(self, dtype):
+        size = 21
+
+        np_a = numpy.arange(size, dtype=dtype)
+        np_a[::3] *= 4
+
+        dp_a = dpnp.arange(size, dtype=dtype)
+        dp_a[::3] *= 4
+
+        assert_allclose(dp_a, np_a)
+
+    @pytest.mark.parametrize("shape",
+                             [(0,), (15, ), (2, 2)],
+                             ids=['(0,)', '(15, )', '(2,2)'])
+    def test_invalid_shape(self, shape):
+        dp_array1 = dpnp.arange(10, dtype=dpnp.float64)
+        dp_array2 = dpnp.arange(5, 15, dtype=dpnp.float64)
+        dp_out = dpnp.empty(shape, dtype=dpnp.float64)
+
+        with pytest.raises(ValueError):
+            dpnp.multiply(dp_array1, dp_array2, out=dp_out)
+
+    @pytest.mark.parametrize("out",
+                             [4, (), [], (3, 7), [2, 4]],
+                             ids=['4', '()', '[]', '(3, 7)', '[2, 4]'])
+    def test_invalid_out(self, out):
+        a = dpnp.arange(10)
+
+        assert_raises(TypeError, dpnp.multiply, a, 2, out)
+        assert_raises(TypeError, numpy.multiply, a.asnumpy(), 2, out)
+
+
 class TestPower:
     @pytest.mark.parametrize("dtype", get_float_complex_dtypes())
     def test_power(self, dtype):
diff --git a/tests/test_strides.py b/tests/test_strides.py
index 8295b03412a3..abc80dd4071b 100644
--- a/tests/test_strides.py
+++ b/tests/test_strides.py
@@ -217,7 +217,7 @@ def test_strides_true_devide(dtype, shape):
 
 
 @pytest.mark.parametrize("func_name",
-                         ["add", "power"])
+                         ["add", "multiply", "power"])
 @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
 def test_strided_out_2args(func_name, dtype):
     np_out = numpy.ones((5, 3, 2))[::3]
@@ -236,7 +236,7 @@ def test_strided_out_2args(func_name, dtype):
 
 
 @pytest.mark.parametrize("func_name",
-                         ["add", "power"])
+                         ["add", "multiply", "power"])
 @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
 def test_strided_in_out_2args(func_name, dtype):
     sh = (3, 4, 2)
@@ -258,7 +258,7 @@ def test_strided_in_out_2args(func_name, dtype):
 
 
 @pytest.mark.parametrize("func_name",
-                         ["add", "power"])
+                         ["add", "multiply", "power"])
 @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
 def test_strided_in_out_2args_diff_out_dtype(func_name, dtype):
     sh = (3, 3, 2)
@@ -280,7 +280,7 @@ def test_strided_in_out_2args_diff_out_dtype(func_name, dtype):
 
 
 @pytest.mark.parametrize("func_name",
-                         ["add", "power"])
+                         ["add", "multiply", "power"])
 @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True, no_none=True))
 def test_strided_in_2args_overlap(func_name, dtype):
     size = 5
@@ -296,7 +296,7 @@ def test_strided_in_2args_overlap(func_name, dtype):
 
 
 @pytest.mark.parametrize("func_name",
-                         ["add", "power"])
+                         ["add", "multiply", "power"])
 @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True, no_none=True))
 def test_strided_in_out_2args_overlap(func_name, dtype):
     sh = (4, 3, 2)
diff --git a/tests/test_usm_type.py b/tests/test_usm_type.py
index 2d5a3f4f1a4d..9c48a20fa268 100644
--- a/tests/test_usm_type.py
+++ b/tests/test_usm_type.py
@@ -33,6 +33,8 @@ def test_coerced_usm_types_mul(usm_type_x, usm_type_y):
     y = dp.arange(10, usm_type = usm_type_y)
 
     z = 3 * x * y * 1.5
+    z *= x
+    z *= 4.8
 
     assert x.usm_type == usm_type_x
     assert y.usm_type == usm_type_y

From 216809364e99753efdbc1d069f44ae328dd0198a Mon Sep 17 00:00:00 2001
From: Anton <100830759+antonwolfy@users.noreply.github.com>
Date: Tue, 4 Apr 2023 10:23:39 +0200
Subject: [PATCH 053/129] Implement support of tuple key in __getitem__ and
 __setitem__ (#1362)

---
 dpnp/dpnp_array.py     | 24 +++++++++++++---
 tests/test_indexing.py | 62 +++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 81 insertions(+), 5 deletions(-)

diff --git a/dpnp/dpnp_array.py b/dpnp/dpnp_array.py
index b8b962caaa15..fcbc17a77a94 100644
--- a/dpnp/dpnp_array.py
+++ b/dpnp/dpnp_array.py
@@ -29,6 +29,23 @@
 
 import dpnp
 
+
+def _get_unwrapped_index_key(key):
+    """
+    Return a key where each nested instance of DPNP array is unwrapped into USM ndarray
+    for futher processing in DPCTL advanced indexing functions.
+
+    """
+
+    if isinstance(key, tuple):
+        if any(isinstance(x, dpnp_array) for x in key):
+            # create a new tuple from the input key with unwrapped DPNP arrays
+            return tuple(x.get_array() if isinstance(x, dpnp_array) else x for x in key)
+    elif isinstance(key, dpnp_array):
+        return key.get_array()
+    return key
+
+
 class dpnp_array:
     """
     Multi-dimensional array object.
@@ -176,8 +193,7 @@ def __ge__(self, other):
  # '__getattribute__',
 
     def __getitem__(self, key):
-        if isinstance(key, dpnp_array):
-            key = key.get_array()
+        key = _get_unwrapped_index_key(key)
 
         item = self._array_obj.__getitem__(key)
         if not isinstance(item, dpt.usm_ndarray):
@@ -340,8 +356,8 @@ def __rxor__(self, other):
  # '__setattr__',
 
     def __setitem__(self, key, val):
-        if isinstance(key, dpnp_array):
-            key = key.get_array()
+        key = _get_unwrapped_index_key(key)
+
         if isinstance(val, dpnp_array):
             val = val.get_array()
 
diff --git a/tests/test_indexing.py b/tests/test_indexing.py
index 41128fd70e2d..fb49d8c87495 100644
--- a/tests/test_indexing.py
+++ b/tests/test_indexing.py
@@ -6,10 +6,70 @@
 
 import numpy
 from numpy.testing import (
-    assert_array_equal
+    assert_,
+    assert_array_equal,
+    assert_equal
 )
 
 
+class TestIndexing:
+    def test_ellipsis_index(self):
+        a = dpnp.array([[1, 2, 3],
+                        [4, 5, 6],
+                        [7, 8, 9]])
+        assert_(a[...] is not a)
+        assert_equal(a[...], a)
+
+        # test that slicing with ellipsis doesn't skip an arbitrary number of dimensions
+        assert_equal(a[0, ...], a[0])
+        assert_equal(a[0, ...], a[0,:])
+        assert_equal(a[..., 0], a[:, 0])
+
+        # test that slicing with ellipsis always results in an array
+        assert_equal(a[0, ..., 1], dpnp.array(2))
+
+        # assignment with `(Ellipsis,)` on 0-d arrays
+        b = dpnp.array(1)
+        b[(Ellipsis,)] = 2
+        assert_equal(b, 2)
+
+    def test_boolean_indexing_list(self):
+        a = dpnp.array([1, 2, 3])
+        b = dpnp.array([True, False, True])
+
+        assert_equal(a[b], [1, 3])
+        assert_equal(a[None, b], [[1, 3]])
+
+    def test_indexing_array_weird_strides(self):
+        np_x = numpy.ones(10)
+        dp_x = dpnp.ones(10)
+
+        np_ind = numpy.arange(10)[:, None, None, None]
+        np_ind = numpy.broadcast_to(np_ind, (10, 55, 4, 4))
+
+        dp_ind = dpnp.arange(10)[:, None, None, None]
+        dp_ind = dpnp.broadcast_to(dp_ind, (10, 55, 4, 4))
+
+        # single advanced index case
+        assert_array_equal(dp_x[dp_ind], np_x[np_ind])
+
+        np_x2 = numpy.ones((10, 2))
+        dp_x2 = dpnp.ones((10, 2))
+
+        np_zind = numpy.zeros(4, dtype=np_ind.dtype)
+        dp_zind = dpnp.zeros(4, dtype=dp_ind.dtype)
+
+        # higher dimensional advanced index
+        assert_array_equal(dp_x2[dp_ind, dp_zind], np_x2[np_ind, np_zind])
+
+    def test_indexing_array_negative_strides(self):
+        arr = dpnp.zeros((4, 4))[::-1, ::-1]
+
+        slices = (slice(None), dpnp.array([0, 1, 2, 3]))
+        arr[slices] = 10
+        assert_array_equal(arr, 10.)
+
+
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
 def test_choose():
     a = numpy.r_[:4]

From 6748b93826ab37fbbbc8afc3cc63a52d8d3021ad Mon Sep 17 00:00:00 2001
From: Anton Volkov <antonwolfy@gmail.com>
Date: Mon, 3 Apr 2023 04:45:44 -0500
Subject: [PATCH 054/129] Implement 'nbytes' property of dpnp array

---
 dpnp/dpnp_array.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/dpnp/dpnp_array.py b/dpnp/dpnp_array.py
index fcbc17a77a94..8211a8e23953 100644
--- a/dpnp/dpnp_array.py
+++ b/dpnp/dpnp_array.py
@@ -782,6 +782,8 @@ def item(self, id=None):
     @property
     def itemsize(self):
         """
+        Length of one array element in bytes.
+
         """
 
         return self._array_obj.itemsize
@@ -807,11 +809,20 @@ def min(self, axis=None, out=None, keepdims=numpy._NoValue, initial=numpy._NoVal
 
         return dpnp.min(self, axis, out, keepdims, initial, where)
 
- # 'nbytes',
+    @property
+    def nbytes(self):
+        """
+        Total bytes consumed by the elements of the array.
+
+        """
+
+        return self._array_obj.nbytes
 
     @property
     def ndim(self):
         """
+        Number of array dimensions.
+
         """
 
         return self._array_obj.ndim

From 1b4f398b00238fd660db1dacc5fe4fc0a07ad97f Mon Sep 17 00:00:00 2001
From: Anton <100830759+antonwolfy@users.noreply.github.com>
Date: Mon, 3 Apr 2023 22:42:04 +0200
Subject: [PATCH 055/129] Update dpnp/dpnp_array.py

Co-authored-by: Oleksandr Pavlyk <oleksandr.pavlyk@intel.com>
---
 dpnp/dpnp_array.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dpnp/dpnp_array.py b/dpnp/dpnp_array.py
index 8211a8e23953..5741ea0fa145 100644
--- a/dpnp/dpnp_array.py
+++ b/dpnp/dpnp_array.py
@@ -782,7 +782,7 @@ def item(self, id=None):
     @property
     def itemsize(self):
         """
-        Length of one array element in bytes.
+        Size of one array element in bytes.
 
         """
 

From 8e45a67e6f551fd184096f1e7a01087b2e24f274 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Tue, 4 Apr 2023 20:34:45 +0200
Subject: [PATCH 056/129] Enable profiling for .pxi and .pyx files

---
 dpnp/dparray.pyx                           |  1 +
 dpnp/dpnp_algo/dpnp_algo.pyx               |  1 +
 dpnp/dpnp_algo/dpnp_algo_arraycreation.pxi |  1 +
 dpnp/dpnp_algo/dpnp_algo_bitwise.pxi       |  1 +
 dpnp/dpnp_algo/dpnp_algo_counting.pxi      |  3 ++-
 dpnp/dpnp_algo/dpnp_algo_indexing.pxi      |  1 +
 dpnp/dpnp_algo/dpnp_algo_linearalgebra.pxi |  1 +
 dpnp/dpnp_algo/dpnp_algo_logic.pxi         |  1 +
 dpnp/dpnp_algo/dpnp_algo_manipulation.pxi  |  3 ++-
 dpnp/dpnp_algo/dpnp_algo_mathematical.pxi  |  3 ++-
 dpnp/dpnp_algo/dpnp_algo_searching.pxi     |  1 +
 dpnp/dpnp_algo/dpnp_algo_sorting.pxi       |  3 ++-
 dpnp/dpnp_algo/dpnp_algo_special.pxi       |  3 ++-
 dpnp/dpnp_algo/dpnp_algo_statistics.pxi    |  1 +
 dpnp/dpnp_algo/dpnp_algo_trigonometric.pxi |  3 ++-
 dpnp/dpnp_utils/dpnp_algo_utils.pyx        |  3 ++-
 dpnp/fft/dpnp_algo_fft.pyx                 |  3 ++-
 dpnp/linalg/dpnp_algo_linalg.pyx           |  3 ++-
 dpnp/random/dpnp_algo_random.pyx           | 11 ++++++-----
 19 files changed, 33 insertions(+), 14 deletions(-)

diff --git a/dpnp/dparray.pyx b/dpnp/dparray.pyx
index dffbf6f65d15..0cf94759a6ce 100644
--- a/dpnp/dparray.pyx
+++ b/dpnp/dparray.pyx
@@ -1,4 +1,5 @@
 # cython: language_level=3
+# cython: linetrace=True
 # -*- coding: utf-8 -*-
 # *****************************************************************************
 # Copyright (c) 2016-2023, Intel Corporation
diff --git a/dpnp/dpnp_algo/dpnp_algo.pyx b/dpnp/dpnp_algo/dpnp_algo.pyx
index 9befd6d20d62..3174b5cbb3f0 100644
--- a/dpnp/dpnp_algo/dpnp_algo.pyx
+++ b/dpnp/dpnp_algo/dpnp_algo.pyx
@@ -1,4 +1,5 @@
 # cython: language_level=3
+# cython: linetrace=True
 # -*- coding: utf-8 -*-
 # *****************************************************************************
 # Copyright (c) 2016-2023, Intel Corporation
diff --git a/dpnp/dpnp_algo/dpnp_algo_arraycreation.pxi b/dpnp/dpnp_algo/dpnp_algo_arraycreation.pxi
index 7b538118b939..3525a42ca0d4 100644
--- a/dpnp/dpnp_algo/dpnp_algo_arraycreation.pxi
+++ b/dpnp/dpnp_algo/dpnp_algo_arraycreation.pxi
@@ -1,4 +1,5 @@
 # cython: language_level=3
+# cython: linetrace=True
 # -*- coding: utf-8 -*-
 # *****************************************************************************
 # Copyright (c) 2016-2023, Intel Corporation
diff --git a/dpnp/dpnp_algo/dpnp_algo_bitwise.pxi b/dpnp/dpnp_algo/dpnp_algo_bitwise.pxi
index a8af53b709d1..6c630f791ad1 100644
--- a/dpnp/dpnp_algo/dpnp_algo_bitwise.pxi
+++ b/dpnp/dpnp_algo/dpnp_algo_bitwise.pxi
@@ -1,4 +1,5 @@
 # cython: language_level=3
+# cython: linetrace=True
 # -*- coding: utf-8 -*-
 # *****************************************************************************
 # Copyright (c) 2016-2023, Intel Corporation
diff --git a/dpnp/dpnp_algo/dpnp_algo_counting.pxi b/dpnp/dpnp_algo/dpnp_algo_counting.pxi
index 119c0d27b692..ef32d3ed3629 100644
--- a/dpnp/dpnp_algo/dpnp_algo_counting.pxi
+++ b/dpnp/dpnp_algo/dpnp_algo_counting.pxi
@@ -1,7 +1,8 @@
 # cython: language_level=3
+# cython: linetrace=True
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2020, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/dpnp_algo/dpnp_algo_indexing.pxi b/dpnp/dpnp_algo/dpnp_algo_indexing.pxi
index 1759d6f782b1..e9dc538393c3 100644
--- a/dpnp/dpnp_algo/dpnp_algo_indexing.pxi
+++ b/dpnp/dpnp_algo/dpnp_algo_indexing.pxi
@@ -1,4 +1,5 @@
 # cython: language_level=3
+# cython: linetrace=True
 # -*- coding: utf-8 -*-
 # *****************************************************************************
 # Copyright (c) 2016-2023, Intel Corporation
diff --git a/dpnp/dpnp_algo/dpnp_algo_linearalgebra.pxi b/dpnp/dpnp_algo/dpnp_algo_linearalgebra.pxi
index 91c1da884050..f9eac4ffd35b 100644
--- a/dpnp/dpnp_algo/dpnp_algo_linearalgebra.pxi
+++ b/dpnp/dpnp_algo/dpnp_algo_linearalgebra.pxi
@@ -1,4 +1,5 @@
 # cython: language_level=3
+# cython: linetrace=True
 # -*- coding: utf-8 -*-
 # *****************************************************************************
 # Copyright (c) 2016-2023, Intel Corporation
diff --git a/dpnp/dpnp_algo/dpnp_algo_logic.pxi b/dpnp/dpnp_algo/dpnp_algo_logic.pxi
index b6ac36db412b..f84e90b186fc 100644
--- a/dpnp/dpnp_algo/dpnp_algo_logic.pxi
+++ b/dpnp/dpnp_algo/dpnp_algo_logic.pxi
@@ -1,4 +1,5 @@
 # cython: language_level=3
+# cython: linetrace=True
 # -*- coding: utf-8 -*-
 # *****************************************************************************
 # Copyright (c) 2016-2023, Intel Corporation
diff --git a/dpnp/dpnp_algo/dpnp_algo_manipulation.pxi b/dpnp/dpnp_algo/dpnp_algo_manipulation.pxi
index 0a51b44e7e52..3e27af363c3c 100644
--- a/dpnp/dpnp_algo/dpnp_algo_manipulation.pxi
+++ b/dpnp/dpnp_algo/dpnp_algo_manipulation.pxi
@@ -1,7 +1,8 @@
 # cython: language_level=3
+# cython: linetrace=True
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2020, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/dpnp_algo/dpnp_algo_mathematical.pxi b/dpnp/dpnp_algo/dpnp_algo_mathematical.pxi
index 5d937a7008bb..18285b1ec105 100644
--- a/dpnp/dpnp_algo/dpnp_algo_mathematical.pxi
+++ b/dpnp/dpnp_algo/dpnp_algo_mathematical.pxi
@@ -1,7 +1,8 @@
 # cython: language_level=3
+# cython: linetrace=True
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2020, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/dpnp_algo/dpnp_algo_searching.pxi b/dpnp/dpnp_algo/dpnp_algo_searching.pxi
index 44621b5cca04..07c7dec5f657 100644
--- a/dpnp/dpnp_algo/dpnp_algo_searching.pxi
+++ b/dpnp/dpnp_algo/dpnp_algo_searching.pxi
@@ -1,4 +1,5 @@
 # cython: language_level=3
+# cython: linetrace=True
 # -*- coding: utf-8 -*-
 # *****************************************************************************
 # Copyright (c) 2016-2023, Intel Corporation
diff --git a/dpnp/dpnp_algo/dpnp_algo_sorting.pxi b/dpnp/dpnp_algo/dpnp_algo_sorting.pxi
index 9a701dd7c905..4d8b475cce49 100644
--- a/dpnp/dpnp_algo/dpnp_algo_sorting.pxi
+++ b/dpnp/dpnp_algo/dpnp_algo_sorting.pxi
@@ -1,7 +1,8 @@
 # cython: language_level=3
+# cython: linetrace=True
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2020, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/dpnp_algo/dpnp_algo_special.pxi b/dpnp/dpnp_algo/dpnp_algo_special.pxi
index fb6ff0d74cc6..179742613840 100644
--- a/dpnp/dpnp_algo/dpnp_algo_special.pxi
+++ b/dpnp/dpnp_algo/dpnp_algo_special.pxi
@@ -1,7 +1,8 @@
 # cython: language_level=3
+# cython: linetrace=True
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2020, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/dpnp_algo/dpnp_algo_statistics.pxi b/dpnp/dpnp_algo/dpnp_algo_statistics.pxi
index 5d21dcf8c74b..6131d292bf93 100644
--- a/dpnp/dpnp_algo/dpnp_algo_statistics.pxi
+++ b/dpnp/dpnp_algo/dpnp_algo_statistics.pxi
@@ -1,4 +1,5 @@
 # cython: language_level=3
+# cython: linetrace=True
 # -*- coding: utf-8 -*-
 # *****************************************************************************
 # Copyright (c) 2016-2023, Intel Corporation
diff --git a/dpnp/dpnp_algo/dpnp_algo_trigonometric.pxi b/dpnp/dpnp_algo/dpnp_algo_trigonometric.pxi
index bf9c4d5e0ed2..364e2dca7fb9 100644
--- a/dpnp/dpnp_algo/dpnp_algo_trigonometric.pxi
+++ b/dpnp/dpnp_algo/dpnp_algo_trigonometric.pxi
@@ -1,7 +1,8 @@
 # cython: language_level=3
+# cython: linetrace=True
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2020, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/dpnp_utils/dpnp_algo_utils.pyx b/dpnp/dpnp_utils/dpnp_algo_utils.pyx
index a94381788764..e741a353771f 100644
--- a/dpnp/dpnp_utils/dpnp_algo_utils.pyx
+++ b/dpnp/dpnp_utils/dpnp_algo_utils.pyx
@@ -1,4 +1,5 @@
 # cython: language_level=3
+# cython: linetrace=True
 # -*- coding: utf-8 -*-
 # *****************************************************************************
 # Copyright (c) 2016-2023, Intel Corporation
@@ -257,7 +258,7 @@ def get_usm_allocations(objects):
 
     if not isinstance(objects, (list, tuple)):
         raise TypeError("Expected a list or a tuple, got {}".format(type(objects)))
-    
+
     if len(objects) == 0:
         return (None, None)
     return (_get_coerced_usm_type(objects), _get_common_allocation_queue(objects))
diff --git a/dpnp/fft/dpnp_algo_fft.pyx b/dpnp/fft/dpnp_algo_fft.pyx
index 393c744d4f36..7f8d1161074f 100644
--- a/dpnp/fft/dpnp_algo_fft.pyx
+++ b/dpnp/fft/dpnp_algo_fft.pyx
@@ -1,7 +1,8 @@
 # cython: language_level=3
+# cython: linetrace=True
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2022, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/linalg/dpnp_algo_linalg.pyx b/dpnp/linalg/dpnp_algo_linalg.pyx
index ddcaf6774999..232a70cafb3e 100644
--- a/dpnp/linalg/dpnp_algo_linalg.pyx
+++ b/dpnp/linalg/dpnp_algo_linalg.pyx
@@ -1,4 +1,5 @@
 # cython: language_level=3
+# cython: linetrace=True
 # -*- coding: utf-8 -*-
 # *****************************************************************************
 # Copyright (c) 2016-2023, Intel Corporation
@@ -87,7 +88,7 @@ cpdef utils.dpnp_descriptor dpnp_cholesky(utils.dpnp_descriptor input_):
 
     # ceate result array with type given by FPTR data
     cdef utils.dpnp_descriptor result = utils.create_output_descriptor(input_.shape,
-                                                                       kernel_data.return_type, 
+                                                                       kernel_data.return_type,
                                                                        None,
                                                                        device=input_obj.sycl_device,
                                                                        usm_type=input_obj.usm_type,
diff --git a/dpnp/random/dpnp_algo_random.pyx b/dpnp/random/dpnp_algo_random.pyx
index 314906cee6d1..504e365405b4 100644
--- a/dpnp/random/dpnp_algo_random.pyx
+++ b/dpnp/random/dpnp_algo_random.pyx
@@ -1,4 +1,5 @@
 # cython: language_level=3
+# cython: linetrace=True
 # -*- coding: utf-8 -*-
 # *****************************************************************************
 # Copyright (c) 2016-2023, Intel Corporation
@@ -143,7 +144,7 @@ ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_rng_laplace_c_1out_t)(c_dpctl.DPCT
                                                                     const double,
                                                                     const size_t,
                                                                     const c_dpctl.DPCTLEventVectorRef) except +
-ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_rng_logistic_c_1out_t)(c_dpctl.DPCTLSyclQueueRef, void * , 
+ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_rng_logistic_c_1out_t)(c_dpctl.DPCTLSyclQueueRef, void * ,
                                                                      const double,
                                                                      const double,
                                                                      const size_t,
@@ -514,7 +515,7 @@ cpdef utils.dpnp_descriptor dpnp_rng_binomial(int ntrial, double p, size):
 
     with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
     c_dpctl.DPCTLEvent_Delete(event_ref)
-    
+
     return result
 
 
@@ -831,7 +832,7 @@ cpdef utils.dpnp_descriptor dpnp_rng_logistic(double loc, double scale, size):
     cdef fptr_dpnp_rng_logistic_c_1out_t func = < fptr_dpnp_rng_logistic_c_1out_t > kernel_data.ptr
     # call FPTR function
     cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, result.get_data(), loc, scale, result.size, NULL)
-    
+
     with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
     c_dpctl.DPCTLEvent_Delete(event_ref)
 
@@ -1015,7 +1016,7 @@ cpdef utils.dpnp_descriptor dpnp_rng_negative_binomial(double a, double p, size)
         func = <fptr_dpnp_rng_negative_binomial_c_1out_t > kernel_data.ptr
         # call FPTR function
         event_ref = func(q_ref, result.get_data(), a, p, result.size, NULL)
-        
+
         with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
         c_dpctl.DPCTLEvent_Delete(event_ref)
 
@@ -1204,7 +1205,7 @@ cpdef utils.dpnp_descriptor dpnp_rng_rayleigh(double scale, size):
 
         q = <c_dpctl.SyclQueue> result_sycl_queue
         q_ref = q.get_queue_ref()
-        
+
         func = <fptr_dpnp_rng_rayleigh_c_1out_t > kernel_data.ptr
         # call FPTR function
         event_ref = func(q_ref, result.get_data(), scale, result.size, NULL)

From 88f9f82adbdb9c4d159a0cbb15bf83e6d3b76d32 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Tue, 4 Apr 2023 20:36:38 +0200
Subject: [PATCH 057/129] Add gen_coverage script

---
 .coveragerc                    |  15 ----
 codecov.yml                    |   1 -
 dpnp/CMakeLists.txt            |  28 +++++---
 dpnp/cmake/copy_existing.cmake |   3 +
 pyproject.toml                 |  34 +++++++++
 scripts/gen_coverage.py        | 128 +++++++++++++++++++++++++++++++++
 6 files changed, 185 insertions(+), 24 deletions(-)
 delete mode 100644 .coveragerc
 delete mode 100644 codecov.yml
 create mode 100644 dpnp/cmake/copy_existing.cmake
 create mode 100644 pyproject.toml
 create mode 100644 scripts/gen_coverage.py

diff --git a/.coveragerc b/.coveragerc
deleted file mode 100644
index 6fe6b46f5e15..000000000000
--- a/.coveragerc
+++ /dev/null
@@ -1,15 +0,0 @@
-[run]
-plugins = Cython.Coverage
-include = dpnp/*,examples/*,build_cython/dpnp/*
-branch = True
-source = dpnp,dpnp.dpnp_algo
-
-[report]
-exclude_lines =
-    if self.debug:
-    pragma: no cover
-    raise NotImplementedError
-    if __name__ == .__main__.:
-ignore_errors = True
-omit =
-    tests/*
diff --git a/codecov.yml b/codecov.yml
deleted file mode 100644
index 69cb76019a47..000000000000
--- a/codecov.yml
+++ /dev/null
@@ -1 +0,0 @@
-comment: false
diff --git a/dpnp/CMakeLists.txt b/dpnp/CMakeLists.txt
index 6262fb7150e9..89435e4577e9 100644
--- a/dpnp/CMakeLists.txt
+++ b/dpnp/CMakeLists.txt
@@ -15,20 +15,33 @@ function(build_dpnp_cython_ext _trgt _src _dest)
   target_include_directories(${_trgt} PRIVATE ${Dpctl_INCLUDE_DIR})
   target_link_directories(${_trgt} PRIVATE ${Dpctl_INCLUDE_DIR}/..)
   target_link_libraries(${_trgt} DPCTLSyclInterface)
-  
+
   set(_linker_options "LINKER:${DPNP_LDFLAGS}")
   target_link_options(${_trgt} PRIVATE ${_linker_options})
   python_extension_module(${_trgt})
-  
+
+  if (DPNP_GENERATE_COVERAGE)
+       set(_copy_cxx_trgt "${_trgt}_copy_cxx")
+       add_custom_target(
+           ${_copy_cxx_trgt} ALL
+           COMMAND ${CMAKE_COMMAND}
+         -DSOURCE_FILE=${_generated_src}
+         -DDEST=${CMAKE_CURRENT_SOURCE_DIR}
+         -P ${CMAKE_SOURCE_DIR}/dpnp/cmake/copy_existing.cmake
+     DEPENDS ${_trgt}
+     VERBATIM
+           COMMENT "Copying Cython-generated source to dpnp"
+       )
+  endif()
   install(TARGETS ${_trgt} LIBRARY DESTINATION ${_dest})
 endfunction()
 
 function(build_dpnp_cython_ext_with_backend _trgt _src _dest)
-  build_dpnp_cython_ext(${_trgt} ${_src} ${_dest})
-  target_link_libraries(${_trgt} dpnp_backend_library)
-  if (UNIX)
-    set_target_properties(${_trgt} PROPERTIES INSTALL_RPATH "$ORIGIN/..")
-  endif()
+build_dpnp_cython_ext(${_trgt} ${_src} ${_dest})
+target_link_libraries(${_trgt} dpnp_backend_library)
+if (UNIX)
+  set_target_properties(${_trgt} PROPERTIES INSTALL_RPATH "$ORIGIN/..")
+endif()
 endfunction()
 
 
@@ -40,4 +53,3 @@ add_subdirectory(dpnp_utils)
 add_subdirectory(fft)
 add_subdirectory(linalg)
 add_subdirectory(random)
-
diff --git a/dpnp/cmake/copy_existing.cmake b/dpnp/cmake/copy_existing.cmake
new file mode 100644
index 000000000000..242dc292578b
--- /dev/null
+++ b/dpnp/cmake/copy_existing.cmake
@@ -0,0 +1,3 @@
+if (EXISTS ${SOURCE_FILE})
+   configure_file(${SOURCE_FILE} ${DEST} COPYONLY)
+endif()
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 000000000000..8f5c236ed288
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,34 @@
+[tool.coverage.run]
+plugins = [
+    "Cython.Coverage"
+]
+branch = true
+source = [
+    "dpnp",
+]
+omit = [
+    "tests/*",
+    "dpnp/_version.py",
+]
+
+[tool.coverage.report]
+omit = [
+    "tests/*",
+    "dpnp/_version.py",
+]
+
+[tool.pytest.ini.options]
+minversion = "6.0"
+norecursedirs= [
+    ".*", "*.egg*", "build", "dist", "conda-recipe",
+]
+addopts = [
+    "--junitxml=junit.xml",
+    "--ignore setup.py",
+    "--ignore run_test.py",
+    "--cov-report term-missing",
+    "--tb native",
+    "--strict",
+    "--durations=20",
+    "-q -ra",
+]
diff --git a/scripts/gen_coverage.py b/scripts/gen_coverage.py
new file mode 100644
index 000000000000..0f43b84b6acf
--- /dev/null
+++ b/scripts/gen_coverage.py
@@ -0,0 +1,128 @@
+import os
+import subprocess
+import sys
+
+def run(
+    c_compiler=None,
+    cxx_compiler=None,
+    bin_llvm=None,
+):
+
+    IS_LIN = False
+
+    if "linux" in sys.platform:
+        IS_LIN = True
+    elif sys.platform in ["win32", "cygwin"]:
+        pass
+    else:
+        assert False, sys.platform + " not supported"
+
+    if not IS_LIN:
+        raise RuntimeError(
+            "This scripts only supports coverage collection on Linux"
+        )
+
+    setup_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+    dpctl_cmake_dir = subprocess.check_output(["python", "-m", "dpctl", "--cmakedir"])
+
+    cmake_args = [
+        sys.executable,
+        "setup.py",
+        "develop",
+        "-G=Ninja",
+        "--",
+        "-DCMAKE_C_COMPILER:PATH=" + c_compiler,
+        "-DCMAKE_CXX_COMPILER:PATH=" + cxx_compiler,
+        "-DDPCTL_MODULE_PATH=" + dpctl_cmake_dir.decode().rstrip(),
+        "-DCMAKE_VERBOSE_MAKEFILE=ON",
+        "-DDPNP_GENERATE_COVERAGE=ON",
+    ]
+
+    env = None
+    if bin_llvm:
+        env = {
+            "PATH": ":".join((os.environ.get("PATH", ""), bin_llvm)),
+            "LLVM_TOOLS_HOME": bin_llvm,
+        }
+        env.update({k: v for k, v in os.environ.items() if k != "PATH"})
+
+
+    subprocess.check_call(cmake_args, shell=False, cwd=setup_dir, env=env)
+
+    env["LLVM_PROFILE_FILE"] = "dpnp_pytest.profraw"
+    subprocess.check_call(
+        [
+            "pytest",
+            "-q",
+            "-ra",
+            "--disable-warnings",
+            "--cov-config",
+            "pyproject.toml",
+            "--cov",
+            "dpnp",
+            "--cov-report",
+            "term-missing",
+            "tests/test_absolute.py",
+            # "--pyargs",
+            # "dpnp",
+            # "-vv",
+        ],
+        cwd=setup_dir,
+        shell=False,
+        env=env,
+    )
+
+    def find_objects():
+        import os
+
+        objects = []
+        dpnp_path = os.getcwd()
+        search_path = os.path.join(dpnp_path, "dpnp")
+        files = os.listdir(search_path)
+        for file in files:
+            if file.endswith("_c.so"):
+                objects.extend(["-object", os.path.join(search_path, file)])
+        return objects
+
+    objects = find_objects()
+    instr_profile_fn = "dpnp_pytest.profdata"
+    # generate instrumentation profile data
+    subprocess.check_call(
+        [
+            os.path.join(bin_llvm, "llvm-profdata"),
+            "merge",
+            "-sparse",
+            env["LLVM_PROFILE_FILE"],
+            "-o",
+            instr_profile_fn,
+        ]
+    )
+
+    # export lcov
+    with open("dpnp_pytest.lcov", "w") as fh:
+        subprocess.check_call(
+            [
+                os.path.join(bin_llvm, "llvm-cov"),
+                "export",
+                "-format=lcov",
+                "-ignore-filename-regex=/tmp/icpx*",
+                "-instr-profile=" + instr_profile_fn,
+            ]
+            + objects,
+            stdout=fh,
+        )
+
+if __name__ == "__main__":
+
+    c_compiler = "icx"
+    cxx_compiler = "icpx"
+    icx_path = subprocess.check_output(["which", "icx"])
+    bin_dir = os.path.dirname(os.path.dirname(icx_path))
+    bin_llvm = os.path.join(bin_dir.decode("utf-8"), "bin-llvm")
+
+
+    run(
+        c_compiler=c_compiler,
+        cxx_compiler=cxx_compiler,
+        bin_llvm=bin_llvm,
+    )

From 2a9a9c76bc6e82ad5a09fa7e371f8c68cf006a34 Mon Sep 17 00:00:00 2001
From: Oleksandr Pavlyk <oleksandr.pavlyk@intel.com>
Date: Tue, 4 Apr 2023 15:26:28 -0500
Subject: [PATCH 058/129] Adding a script to drive build of dpnp

---
 scripts/build_locally.py | 187 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 187 insertions(+)
 create mode 100644 scripts/build_locally.py

diff --git a/scripts/build_locally.py b/scripts/build_locally.py
new file mode 100644
index 000000000000..5b63409e0e2d
--- /dev/null
+++ b/scripts/build_locally.py
@@ -0,0 +1,187 @@
+# -*- coding: utf-8 -*-
+# *****************************************************************************
+# Copyright (c) 2016-2023, Intel Corporation
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# - Redistributions of source code must retain the above copyright notice,
+#   this list of conditions and the following disclaimer.
+# - Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+# THE POSSIBILITY OF SUCH DAMAGE.
+# *****************************************************************************
+
+import os
+import subprocess
+import sys
+import dpctl
+
+
+def run(
+    use_oneapi=True,
+    build_type="Release",
+    c_compiler=None,
+    cxx_compiler=None,
+    compiler_root=None,
+    cmake_executable=None,
+    verbose=False,
+    cmake_opts="",
+):
+    build_system = None
+
+    if "linux" in sys.platform:
+        build_system = "Ninja"
+    elif sys.platform in ["win32", "cygwin"]:
+        build_system = "Ninja"
+    else:
+        assert False, sys.platform + " not supported"
+
+    setup_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+    cmake_args = [
+        sys.executable,
+        "setup.py",
+        "develop",
+    ]
+    if cmake_executable:
+        cmake_args += [
+            "--cmake-executable=" + cmake_executable,
+        ]
+    dpctl_module_path = os.path.join(dpctl.get_include(), "..", "resources", "cmake")
+    cmake_args += [
+        "--build-type=" + build_type,
+        "--generator=" + build_system,
+        "--",
+        "-DCMAKE_C_COMPILER:PATH=" + c_compiler,
+        "-DCMAKE_CXX_COMPILER:PATH=" + cxx_compiler,
+        "-DDPCTL_MODULE_PATH:PATH=" + dpctl_module_path,
+    ]
+    if verbose:
+        cmake_args += [
+            "-DCMAKE_VERBOSE_MAKEFILE:BOOL=ON",
+        ]
+    if cmake_opts:
+        cmake_args += cmake_opts.split()
+    if use_oneapi:
+        if "DPL_ROOT" in os.environ:
+            os.environ["DPL_ROOT_HINT"] = os.environ["DPL_ROOT"]
+    subprocess.check_call(
+        cmake_args, shell=False, cwd=setup_dir, env=os.environ
+    )
+
+
+if __name__ == "__main__":
+    import argparse
+
+    parser = argparse.ArgumentParser(
+        description="Driver to build dpnp for in-place installation"
+    )
+    driver = parser.add_argument_group(title="Coverage driver arguments")
+    driver.add_argument("--c-compiler", help="Name of C compiler", default=None)
+    driver.add_argument(
+        "--cxx-compiler", help="Name of C++ compiler", default=None
+    )
+    driver.add_argument(
+        "--oneapi",
+        help="Set if using one-API installation",
+        dest="oneapi",
+        action="store_true",
+    )
+    driver.add_argument(
+        "--debug",
+        default="Release",
+        const="Debug",
+        action="store_const",
+        help="Set the compilation mode to debugging",
+    )
+    driver.add_argument(
+        "--compiler-root",
+        type=str,
+        help="Path to compiler home directory",
+        default=None,
+    )
+    driver.add_argument(
+        "--cmake-executable",
+        type=str,
+        help="Path to cmake executable",
+        default=None,
+    )
+    driver.add_argument(
+        "--verbose",
+        help="Build using vebose makefile mode",
+        dest="verbose",
+        action="store_true",
+    )
+    driver.add_argument(
+        "--cmake-opts",
+        help="DPCTLSyclInterface uses Google logger",
+        dest="cmake_opts",
+        default="",
+        type=str,
+    )
+    args = parser.parse_args()
+
+    args_to_validate = [
+        "c_compiler",
+        "cxx_compiler",
+        "compiler_root",
+    ]
+
+    if args.oneapi or (
+        args.c_compiler is None
+        and args.cxx_compiler is None
+        and args.compiler_root is None
+    ):
+        args.c_compiler = "icx"
+        args.cxx_compiler = "icpx" if "linux" in sys.platform else "icx"
+        args.compiler_root = None
+    else:
+        cr = args.compiler_root
+        if isinstance(cr, str) and os.path.exists(cr):
+            if args.c_compiler is None:
+                args.c_compiler = "icx"
+            if args.cxx_compiler is None:
+                args.cxx_compiler = "icpx" if "linux" in sys.platform else "icx"
+        else:
+            raise RuntimeError(
+                "Option 'compiler-root' must be provided when "
+                "using non-default DPC++ layout."
+            )
+        args_to_validate = [
+            "c_compiler",
+            "cxx_compiler",
+        ]
+        for p in args_to_validate:
+            arg = getattr(args, p)
+            assert isinstance(arg, str)
+            if not os.path.exists(arg):
+                arg2 = os.path.join(cr, arg)
+                if os.path.exists(arg2):
+                    arg = arg2
+                    setattr(args, p, arg)
+            if not os.path.exists(arg):
+                opt_name = p.replace("_", "-")
+                raise RuntimeError(f"Option {opt_name} value {arg} must exist.")
+
+    run(
+        use_oneapi=args.oneapi,
+        build_type=args.debug,
+        c_compiler=args.c_compiler,
+        cxx_compiler=args.cxx_compiler,
+        compiler_root=args.compiler_root,
+        cmake_executable=args.cmake_executable,
+        verbose=args.verbose,
+        cmake_opts=args.cmake_opts,
+    )

From 22880e6606e371847208eb7dccb76c5722cc4b9e Mon Sep 17 00:00:00 2001
From: Oleksandr Pavlyk <oleksandr.pavlyk@intel.com>
Date: Tue, 4 Apr 2023 15:40:17 -0500
Subject: [PATCH 059/129] Removed uses of 0.build.sh and 1.build.bat

---
 0.build.sh                     | 73 ----------------------------------
 0.env.sh                       | 24 -----------
 1.build.bat                    | 11 -----
 1.env.bat                      |  8 ----
 README.md                      |  3 +-
 doc/0.builddoc.sh              |  1 -
 doc/install.rst                |  2 +-
 scripts/azure-pipelines.yml    |  2 +-
 scripts/install_cmake_lin.sh   |  4 +-
 scripts/install_system_deps.sh |  3 +-
 10 files changed, 7 insertions(+), 124 deletions(-)
 delete mode 100755 0.build.sh
 delete mode 100755 0.env.sh
 delete mode 100644 1.build.bat
 delete mode 100644 1.env.bat

diff --git a/0.build.sh b/0.build.sh
deleted file mode 100755
index b1a2a29ec0ae..000000000000
--- a/0.build.sh
+++ /dev/null
@@ -1,73 +0,0 @@
-#!/bin/bash
-THEDIR=$(dirname $(readlink -e ${BASH_SOURCE[0]}))
-
-# . ${THEDIR}/0.env.sh
-cd ${THEDIR}
-
-# Assign $TMP env variable to a directory where the script locates.
-# The env variable is used by compiler as a path to temporary folder,
-# where it can store a temporary files generated during compilation and linkage phases.
-# By default the compiler uses /tmp folder, but it is limited by the size and
-# there might be not enough space to temporary keep all generated data.
-export TMP=${THEDIR}
-
-
-export DPNP_DEBUG=1
-
-python setup.py clean
-python setup.py build_clib
-
-# inplace build
-CC=icpx python setup.py build_ext --inplace
-
-# development build. Root privileges needed
-# python setup.py develop
-
-echo
-echo =========example3==============
-DPCTL_INCLUDES=$(python -m dpctl --includes)
-icpx -fsycl -g -O0 -ggdb3 -fPIC dpnp/backend/examples/example3.cpp $DPCTL_INCLUDES -Idpnp -Idpnp/backend/include -Ldpnp -Wl,-rpath='$ORIGIN'/dpnp -ldpnp_backend_c -o example3
-# LD_DEBUG=libs,bindings,symbols ./example3
-./example3
-
-# gcc --version
-# echo =========LD_LIBRARY_PATH==============
-# echo $LD_LIBRARY_PATH
-
-# echo =========ldd example3==============
-# ldd ./example3
-# echo =========readelf example3==============
-# readelf -d ./example3
-# echo =========ldd dpnp/libdpnp_backend_c.so==============
-# ldd ./dpnp/libdpnp_backend_c.so
-# echo =========readelf dpnp/libdpnp_backend_c.so==============
-# readelf -d ./dpnp/libdpnp_backend_c.so
-
-# echo ========= libstdc++.so ==============
-# ls -l /usr/share/miniconda/envs/dpnp*/lib/libstdc++.so
-# strings /usr/share/miniconda/envs/dpnp*/lib/libstdc++.so | grep GLIBCXX | sort -n
-
-
-echo
-echo =========example1==============
-# LD_DEBUG=libs,bindings,symbols python examples/example1.py
-# LD_DEBUG=libs python examples/example1.py
-python examples/example1.py
-
-# echo ========= find /opt ==============
-# find /opt -name libstdc++.so*
-# echo ========= find anaconda ==============
-# find /usr/share/miniconda -name libstdc++.so*
-# echo ========= dpkg-query -L libstdc++6 ==============
-# dpkg-query -L libstdc++6
-# echo ========= ls -l /lib/x86_64-linux-gnu/libstdc* ==============
-# ls -l /lib/x86_64-linux-gnu/libstdc*
-
-# gcc --version
-# g++ --version
-# dpcpp --version
-
-# echo ========= APT ==============
-# apt list --installed
-# echo ========= conda ==============
-# conda list
diff --git a/0.env.sh b/0.env.sh
deleted file mode 100755
index 6759181ce760..000000000000
--- a/0.env.sh
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/bin/bash
-
-THEDIR=$(dirname $(readlink -e ${BASH_SOURCE[0]}))
-
-# We can not use common setup script because
-# using Intel Python brakes build and run procedure
-export ONEAPI_ROOT=/opt/intel/oneapi
-
-. ${ONEAPI_ROOT}/compiler/latest/env/vars.sh
-. ${ONEAPI_ROOT}/tbb/latest/env/vars.sh
-
-if false
-then
-    # Temporary use explicit version (arg_verz) due to MKLD-10520
-    arg_verz=latest
-    . ${ONEAPI_ROOT}/mkl/latest/env/vars.sh
-    unset arg_verz
-else
-    . ${ONEAPI_ROOT}/mkl/latest/env/vars.sh
-fi
-
-export DPCPPROOT=${ONEAPI_ROOT}/compiler/latest
-
-export PYTHONPATH=$PYTHONPATH:${THEDIR}
diff --git a/1.build.bat b/1.build.bat
deleted file mode 100644
index 148eab47cae9..000000000000
--- a/1.build.bat
+++ /dev/null
@@ -1,11 +0,0 @@
-
-:: CALL "1.env.bat"
-
-python setup.py clean
-python setup.py build_clib_setuptools
-
-:: inplace build
-python setup.py build_ext --inplace
-
-:: development build. Root privileges needed
-:: python setup.py develop
diff --git a/1.env.bat b/1.env.bat
deleted file mode 100644
index 069ddc8577eb..000000000000
--- a/1.env.bat
+++ /dev/null
@@ -1,8 +0,0 @@
-
-SET "ONEAPI_ROOT=C:\oneapi"
-CALL "%ONEAPI_ROOT%\compiler\latest\env\vars.bat"
-CALL "%ONEAPI_ROOT%\mkl\latest\env\vars.bat"
-CALL "%ONEAPI_ROOT%\tbb\latest\env\vars.bat"
-CALL "%ONEAPI_ROOT%\dpl\latest\env\vars.bat"
-
-SET "DPCPPROOT=%ONEAPI_ROOT%\compiler\latest"
diff --git a/README.md b/README.md
index 360d6b4e730e..b10394c0eec0 100644
--- a/README.md
+++ b/README.md
@@ -24,7 +24,7 @@ After these steps, `dpnp` can be built in debug mode as follows:
 ```bash
 git clone https://github.com/IntelPython/dpnp
 cd dpnp
-./0.build.sh
+python scripts/build_locally.py
 ```
 
 ## Install Wheel Package from Pypi
@@ -48,7 +48,6 @@ export OCL_ICD_FILENAMES=libintelocl.so
 
 ## Run test
 ```bash
-. ./0.env.sh
 pytest
 # or
 pytest tests/test_matmul.py -s -v
diff --git a/doc/0.builddoc.sh b/doc/0.builddoc.sh
index e97883cb5c52..5dd034ac6674 100755
--- a/doc/0.builddoc.sh
+++ b/doc/0.builddoc.sh
@@ -4,7 +4,6 @@ BUILDDOCDIR=$(dirname $(readlink -e ${BASH_SOURCE[0]}))
 ROOTDIR=$BUILDDOCDIR/..
 
 cd $ROOTDIR
-. 0.env.sh
 python setup.py develop
 
 cd $BUILDDOCDIR
diff --git a/doc/install.rst b/doc/install.rst
index 7569c14f9ffa..c47c1edad7f3 100644
--- a/doc/install.rst
+++ b/doc/install.rst
@@ -34,7 +34,7 @@ You can install the latest development version of DPNP from a cloned Git reposit
 
   $ git clone --recursive https://github.com/IntelPython/dpnp.git
   $ cd dpnp
-  $ ./0.build.sh
+  $ python scripts/build_locally.py
 
 .. note::
 
diff --git a/scripts/azure-pipelines.yml b/scripts/azure-pipelines.yml
index 3a9921464473..206a6fe4d53e 100644
--- a/scripts/azure-pipelines.yml
+++ b/scripts/azure-pipelines.yml
@@ -40,7 +40,7 @@ jobs:
       echo ========================= CI ENV ==========================================
       . ./scripts/set_ci_env.sh
       echo ========================= build DPNP ======================================
-      ./0.build.sh
+      python scripts/build_locally.py
       echo ========================= run valgrind ====================================
       export PYTHONMALLOC=malloc
       valgrind --show-leak-kinds=definite --log-file=/tmp/valgrind-output            \
diff --git a/scripts/install_cmake_lin.sh b/scripts/install_cmake_lin.sh
index 6a8f7c7b3006..966a22c617be 100755
--- a/scripts/install_cmake_lin.sh
+++ b/scripts/install_cmake_lin.sh
@@ -4,13 +4,13 @@ THEDIR=$(dirname $(readlink -e ${BASH_SOURCE[0]}))
 
 echo ========================= install cmake ==================================
 curl --output cmake_webimage.tar.gz \
-  --url https://cmake.org/files/v3.19/cmake-3.19.2-Linux-x86_64.tar.gz \
+  --url https://github.com/Kitware/CMake/releases/download/v3.26.2/cmake-3.26.2-linux-x86_64.tar.gz \
   --retry 5 --retry-delay 5
 
 tar -xzf cmake_webimage.tar.gz
 rm -f cmake_webimage.tar.gz
 
-export PATH=`pwd`/cmake-3.19.2-Linux-x86_64/bin:$PATH
+export PATH=`pwd`/cmake-3.26.2-linux-x86_64/bin:$PATH
 
 which cmake
 cmake --version
diff --git a/scripts/install_system_deps.sh b/scripts/install_system_deps.sh
index ce7104ed1a31..14ac03bb5a35 100755
--- a/scripts/install_system_deps.sh
+++ b/scripts/install_system_deps.sh
@@ -58,7 +58,8 @@ update-alternatives --get-selections
 echo ========================= install Intel OneAPI ===========================
 sudo aptitude install -y intel-oneapi-mkl                \
                          intel-oneapi-mkl-devel          \
-                         intel-oneapi-compiler-dpcpp-cpp
+                         intel-oneapi-compiler-dpcpp-cpp \
+			 intel-tbb
 
 #intel-oneapi-python
 

From 0416c41b2bd9ce77949badabde3bf18edcbf2f56 Mon Sep 17 00:00:00 2001
From: Oleksandr Pavlyk <oleksandr.pavlyk@intel.com>
Date: Tue, 4 Apr 2023 15:43:39 -0500
Subject: [PATCH 060/129] Corrected documentation for option cmake-opts

---
 scripts/build_locally.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/build_locally.py b/scripts/build_locally.py
index 5b63409e0e2d..6d008851e7e1 100644
--- a/scripts/build_locally.py
+++ b/scripts/build_locally.py
@@ -126,7 +126,7 @@ def run(
     )
     driver.add_argument(
         "--cmake-opts",
-        help="DPCTLSyclInterface uses Google logger",
+        help="Channels through additional cmake options",
         dest="cmake_opts",
         default="",
         type=str,

From 25e0f19388d91abd850e6125ba266a48c19de3c2 Mon Sep 17 00:00:00 2001
From: Natalia Polina <natalia.polina@intel.com>
Date: Wed, 5 Apr 2023 01:23:52 -0500
Subject: [PATCH 061/129] Fix gh-1352 dpnp.sum() for empty array crashes.

---
 dpnp/dpnp_algo/dpnp_algo_mathematical.pyx | 3 +++
 tests/test_mathematical.py                | 9 +++++++++
 2 files changed, 12 insertions(+)

diff --git a/dpnp/dpnp_algo/dpnp_algo_mathematical.pyx b/dpnp/dpnp_algo/dpnp_algo_mathematical.pyx
index 5d937a7008bb..b0a74ca587e4 100644
--- a/dpnp/dpnp_algo/dpnp_algo_mathematical.pyx
+++ b/dpnp/dpnp_algo/dpnp_algo_mathematical.pyx
@@ -631,6 +631,9 @@ cpdef utils.dpnp_descriptor dpnp_sum(utils.dpnp_descriptor x1,
                                                                        usm_type=x1_obj.usm_type,
                                                                        sycl_queue=x1_obj.sycl_queue)
 
+    if x1.size == 0 and axis is None:
+        return result
+
     result_sycl_queue = result.get_array().sycl_queue
 
     cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
diff --git a/tests/test_mathematical.py b/tests/test_mathematical.py
index ad16a9c7555f..776d2c60a85d 100644
--- a/tests/test_mathematical.py
+++ b/tests/test_mathematical.py
@@ -923,3 +923,12 @@ def test_float_to_inf(self):
         dpnp_res = dpnp.array(a) ** dpnp.array(b)
 
         assert_allclose(numpy_res, dpnp_res.asnumpy())
+
+
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_complex=True, no_bool=True))
+@pytest.mark.parametrize("axis", [None, 0, 1, 2, 3])
+def test_sum_empty(dtype, axis):
+    a = numpy.empty((1, 2, 0, 4), dtype=dtype)
+    numpy_res = a.sum(axis=axis)
+    dpnp_res = dpnp.array(a).sum(axis=axis)
+    assert_array_equal(numpy_res, dpnp_res.asnumpy())

From 15babb0b18e04b2a2aa7cf7aa6a559d3d2c87107 Mon Sep 17 00:00:00 2001
From: Natalia Polina <natalia.polina@intel.com>
Date: Wed, 5 Apr 2023 02:20:39 -0500
Subject: [PATCH 062/129] Fixed dpnp.sum() function with output argument.

---
 dpnp/dpnp_iface_mathematical.py | 7 +++++++
 tests/test_mathematical.py      | 9 +++++++++
 2 files changed, 16 insertions(+)

diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py
index 525180e6106c..4cc0f5a76390 100644
--- a/dpnp/dpnp_iface_mathematical.py
+++ b/dpnp/dpnp_iface_mathematical.py
@@ -44,6 +44,7 @@
 from dpnp.dpnp_utils import *
 
 import dpnp
+import dpnp.dpnp_container as dpnp_container
 
 import numpy
 import dpctl.tensor as dpt
@@ -1629,10 +1630,16 @@ def sum(x1, axis=None, dtype=None, out=None, keepdims=False, initial=None, where
         if where is not True:
             pass
         else:
+            if dpnp.isscalar(out):
+                raise TypeError("output must be an array")
             out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) if out is not None else None
             result_obj = dpnp_sum(x1_desc, axis, dtype, out_desc, keepdims, initial, where).get_pyobj()
             result = dpnp.convert_single_elem_array_to_scalar(result_obj, keepdims)
 
+            if x1_desc.size == 0 and axis is None:
+                result = dpnp_container.zeros_like(result)
+                if out is not None:
+                    out[...] = result
             return result
 
     return call_origin(numpy.sum, x1, axis=axis, dtype=dtype, out=out, keepdims=keepdims, initial=initial, where=where)
diff --git a/tests/test_mathematical.py b/tests/test_mathematical.py
index 776d2c60a85d..7b01f79f9c7b 100644
--- a/tests/test_mathematical.py
+++ b/tests/test_mathematical.py
@@ -932,3 +932,12 @@ def test_sum_empty(dtype, axis):
     numpy_res = a.sum(axis=axis)
     dpnp_res = dpnp.array(a).sum(axis=axis)
     assert_array_equal(numpy_res, dpnp_res.asnumpy())
+
+
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_complex=True, no_bool=True))
+def test_sum_empty_out(dtype):
+    a = dpnp.empty((1, 2, 0, 4), dtype=dtype)
+    out = dpnp.ones(())
+    res = a.sum(out=out)
+    assert_array_equal(out.asnumpy(), res.asnumpy())
+    assert_array_equal(out.asnumpy(), numpy.array(0, dtype=dtype))

From d1223225a832988444d29685ee0d4b8f23c1459e Mon Sep 17 00:00:00 2001
From: Natalia Polina <natalia.polina@intel.com>
Date: Wed, 5 Apr 2023 03:27:47 -0500
Subject: [PATCH 063/129] Fixed erroneous call.

---
 dpnp/dpnp_iface_mathematical.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py
index 4cc0f5a76390..98dcc71d31af 100644
--- a/dpnp/dpnp_iface_mathematical.py
+++ b/dpnp/dpnp_iface_mathematical.py
@@ -44,7 +44,6 @@
 from dpnp.dpnp_utils import *
 
 import dpnp
-import dpnp.dpnp_container as dpnp_container
 
 import numpy
 import dpctl.tensor as dpt
@@ -1637,7 +1636,7 @@ def sum(x1, axis=None, dtype=None, out=None, keepdims=False, initial=None, where
             result = dpnp.convert_single_elem_array_to_scalar(result_obj, keepdims)
 
             if x1_desc.size == 0 and axis is None:
-                result = dpnp_container.zeros_like(result)
+                result = dpnp.zeros_like(result)
                 if out is not None:
                     out[...] = result
             return result

From b6ce69a73fa4bc3dc3cf4b51c4f8f9cc36d6d89a Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Wed, 5 Apr 2023 15:27:45 +0200
Subject: [PATCH 064/129] Add generate_coverage.yaml

---
 .github/workflows/generate_coverage.yaml | 65 ++++++++++++++++++++++++
 1 file changed, 65 insertions(+)
 create mode 100644 .github/workflows/generate_coverage.yaml

diff --git a/.github/workflows/generate_coverage.yaml b/.github/workflows/generate_coverage.yaml
new file mode 100644
index 000000000000..d1eca72c55f2
--- /dev/null
+++ b/.github/workflows/generate_coverage.yaml
@@ -0,0 +1,65 @@
+name: Generate coverage data for dpnp
+on:
+  pull_request:
+  push:
+    branches: [add_gen_coverage]
+
+jobs:
+  generate-coverage:
+    name: Generate coverage and push to Coveralls.io
+    runs-on: ubuntu-20.04
+
+    defaults:
+      run:
+        shell: bash -l {0}
+
+    env:
+      python-ver: '3.10'
+
+    steps:
+      - name: Cancel Previous Runs
+        uses: styfle/cancel-workflow-action@0.11.0
+        with:
+          access_token: ${{ github.token }}
+
+      - name: Checkout repo
+        uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+
+      - name: Setup miniconda
+        uses: conda-incubator/setup-miniconda@v2.2.0
+        with:
+          auto-update-conda: true
+          python-version: ${{ env.python-ver }}
+          miniconda-version: 'latest'
+          activate-environment: 'coverage'
+          channels: intel, conda-forge
+
+      - name: Install Lcov
+        run: |
+          sudo apt-get install lcov
+      - name: Install dpnp dependencies
+        run: |
+          conda install cython llvm cmake scikit-build ninja pytest pytest-cov \
+              dpctl dpcpp_linux-64  mkl-devel-dpcpp  tbb-devel  onedpl-devel
+      - name: Conda info
+        run: |
+          conda info
+          conda list
+      - name: Build dpnp with coverage
+        run: |
+          python scripts/gen_coverage.py
+      - name: Install coverall dependencies
+        run: |
+          sudo gem install coveralls-lcov
+          conda install coveralls
+      - name: Upload coverage data to coveralls.io
+        run: |
+          echo "Processing pytest-coverage"
+          export DPNP_PYTEST_LCOV=$(find . -name dpnp_pytest.lcov)
+          grep "/tmp" $DPNP_PYTEST_LCOV
+          coveralls-lcov -v -n \
+              $DPNP_PYTEST_LCOV > pytest-dpnp-c-api-coverage.json
+          # merge file with coverage data and upload
+          ls -lh pytest-dpnp-c-api-coverage.json $(find . -name dpnp_pytest.lcov)

From 9bf1cc4cf544b1084ada3f8daa433188c99074e7 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Wed, 5 Apr 2023 17:41:52 +0200
Subject: [PATCH 065/129] set env

---
 .github/workflows/generate_coverage.yaml | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/generate_coverage.yaml b/.github/workflows/generate_coverage.yaml
index d1eca72c55f2..5e2dcf7f33e0 100644
--- a/.github/workflows/generate_coverage.yaml
+++ b/.github/workflows/generate_coverage.yaml
@@ -42,7 +42,7 @@ jobs:
       - name: Install dpnp dependencies
         run: |
           conda install cython llvm cmake scikit-build ninja pytest pytest-cov \
-              dpctl dpcpp_linux-64  mkl-devel-dpcpp  tbb-devel  onedpl-devel
+              dppy/label/dev::dpctl dpcpp_linux-64  mkl-devel-dpcpp  tbb-devel  onedpl-devel
       - name: Conda info
         run: |
           conda info
@@ -50,6 +50,9 @@ jobs:
       - name: Build dpnp with coverage
         run: |
           python scripts/gen_coverage.py
+        env:
+          name: OCL_ICD_FILENAMES
+          value: libintelocl.so
       - name: Install coverall dependencies
         run: |
           sudo gem install coveralls-lcov

From 15f28555bb2dc170503c5a61aac5e48932215607 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Wed, 5 Apr 2023 18:39:44 +0200
Subject: [PATCH 066/129] Set OSL_ICD_FILENAMES env

---
 .github/workflows/generate_coverage.yaml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/.github/workflows/generate_coverage.yaml b/.github/workflows/generate_coverage.yaml
index 5e2dcf7f33e0..7247af61437e 100644
--- a/.github/workflows/generate_coverage.yaml
+++ b/.github/workflows/generate_coverage.yaml
@@ -51,8 +51,7 @@ jobs:
         run: |
           python scripts/gen_coverage.py
         env:
-          name: OCL_ICD_FILENAMES
-          value: libintelocl.so
+          OCL_ICD_FILENAMES: 'libintelocl.so'
       - name: Install coverall dependencies
         run: |
           sudo gem install coveralls-lcov

From c389b9d301dec24b18101e60f7f9142631e6e4dc Mon Sep 17 00:00:00 2001
From: vlad-perevezentsev <vladislav.perevezentsev@intel.com>
Date: Wed, 5 Apr 2023 18:43:43 +0200
Subject: [PATCH 067/129] Support `out` parameter in dpnp.sqrt() (#1332)

* Add support out parameter in dpnp.sqrt()
---
 dpnp/dpnp_algo/dpnp_algo.pxd               |  2 +-
 dpnp/dpnp_algo/dpnp_algo_trigonometric.pyx |  4 +-
 dpnp/dpnp_iface_trigonometric.py           | 26 +++++++++--
 tests/test_sycl_queue.py                   | 36 +++++++++++++--
 tests/test_umath.py                        | 53 +++++++++++++++++++++-
 tests/test_usm_type.py                     | 17 +++++++
 6 files changed, 126 insertions(+), 12 deletions(-)

diff --git a/dpnp/dpnp_algo/dpnp_algo.pxd b/dpnp/dpnp_algo/dpnp_algo.pxd
index c2bb15102cf0..09af5667f8c4 100644
--- a/dpnp/dpnp_algo/dpnp_algo.pxd
+++ b/dpnp/dpnp_algo/dpnp_algo.pxd
@@ -603,7 +603,7 @@ cpdef dpnp_descriptor dpnp_radians(dpnp_descriptor array1)
 cpdef dpnp_descriptor dpnp_recip(dpnp_descriptor array1)
 cpdef dpnp_descriptor dpnp_sin(dpnp_descriptor array1, dpnp_descriptor out)
 cpdef dpnp_descriptor dpnp_sinh(dpnp_descriptor array1)
-cpdef dpnp_descriptor dpnp_sqrt(dpnp_descriptor array1)
+cpdef dpnp_descriptor dpnp_sqrt(dpnp_descriptor array1, dpnp_descriptor out)
 cpdef dpnp_descriptor dpnp_square(dpnp_descriptor array1)
 cpdef dpnp_descriptor dpnp_tan(dpnp_descriptor array1, dpnp_descriptor out)
 cpdef dpnp_descriptor dpnp_tanh(dpnp_descriptor array1)
diff --git a/dpnp/dpnp_algo/dpnp_algo_trigonometric.pyx b/dpnp/dpnp_algo/dpnp_algo_trigonometric.pyx
index bf9c4d5e0ed2..81c6f3cfc0d6 100644
--- a/dpnp/dpnp_algo/dpnp_algo_trigonometric.pyx
+++ b/dpnp/dpnp_algo/dpnp_algo_trigonometric.pyx
@@ -148,8 +148,8 @@ cpdef utils.dpnp_descriptor dpnp_sinh(utils.dpnp_descriptor x1):
     return call_fptr_1in_1out_strides(DPNP_FN_SINH_EXT, x1)
 
 
-cpdef utils.dpnp_descriptor dpnp_sqrt(utils.dpnp_descriptor x1):
-    return call_fptr_1in_1out_strides(DPNP_FN_SQRT_EXT, x1)
+cpdef utils.dpnp_descriptor dpnp_sqrt(utils.dpnp_descriptor x1, utils.dpnp_descriptor out):
+    return call_fptr_1in_1out_strides(DPNP_FN_SQRT_EXT, x1, dtype=None, out=out, where=True, func_name='sqrt')
 
 
 cpdef utils.dpnp_descriptor dpnp_square(utils.dpnp_descriptor x1):
diff --git a/dpnp/dpnp_iface_trigonometric.py b/dpnp/dpnp_iface_trigonometric.py
index 098dd19648f3..473401071643 100644
--- a/dpnp/dpnp_iface_trigonometric.py
+++ b/dpnp/dpnp_iface_trigonometric.py
@@ -41,6 +41,7 @@
 
 
 import numpy
+import dpctl.tensor as dpt
 
 from dpnp.dpnp_algo import *
 from dpnp.dpnp_utils import *
@@ -906,7 +907,7 @@ def sinh(x1):
     return call_origin(numpy.sinh, x1, **kwargs)
 
 
-def sqrt(x1):
+def sqrt(x1, /, out = None, **kwargs):
     """
     Return the positive square-root of an array, element-wise.
 
@@ -914,8 +915,11 @@ def sqrt(x1):
 
     Limitations
     -----------
-    Input array is supported as :obj:`dpnp.ndarray`.
+    Input array is supported as either :class:`dpnp.ndarray` or :class:`dpctl.tensor.usm_ndarray`.
+    Parameter `out` is supported as class:`dpnp.ndarray`, class:`dpctl.tensor.usm_ndarray` or
+    with default value ``None``.
     Otherwise the function will be executed sequentially on CPU.
+    Keyword arguments ``kwargs`` are currently unsupported.
     Input array data types are limited by supported DPNP :ref:`Data types`.
 
     Examples
@@ -928,11 +932,23 @@ def sqrt(x1):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False)
+    x1_desc = (
+        dpnp.get_dpnp_descriptor(
+            x1, copy_when_strides=False, copy_when_nondefault_queue=False
+        )
+        if not kwargs
+        else None
+    )
     if x1_desc:
-        return dpnp_sqrt(x1_desc).get_pyobj()
+        if out is not None:
+            if not isinstance(out, (dpnp.ndarray, dpt.usm_ndarray)):
+                raise TypeError("return array must be of supported array type")
+            out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) or None
+        else:
+            out_desc = None
+        return dpnp_sqrt(x1_desc, out=out_desc).get_pyobj()
 
-    return call_origin(numpy.sqrt, x1)
+    return call_origin(numpy.sqrt, x1, out=out, **kwargs)
 
 
 def square(x1):
diff --git a/tests/test_sycl_queue.py b/tests/test_sycl_queue.py
index ab974e426f93..fcea0d82eb86 100644
--- a/tests/test_sycl_queue.py
+++ b/tests/test_sycl_queue.py
@@ -9,6 +9,7 @@
 import numpy
 
 from numpy.testing import (
+    assert_allclose,
     assert_array_equal,
     assert_raises
 )
@@ -218,7 +219,7 @@ def test_array_creation_cross_device(func, args, kwargs, device_x, device_y):
 
     dpnp_kwargs = dict(kwargs)
     dpnp_kwargs['device'] = device_y
-    
+
     y = getattr(dpnp, func)(*dpnp_args, **dpnp_kwargs)
     numpy.testing.assert_allclose(y_orig, y)
 
@@ -279,6 +280,8 @@ def test_meshgrid(device_x, device_y):
                      [1., 2.]),
         pytest.param("sign",
                      [-5., 4.5]),
+        pytest.param("sqrt",
+                     [1., 3., 9.]),
         pytest.param("sum",
                      [1., 2.]),
         pytest.param("trapz",
@@ -297,7 +300,7 @@ def test_1in_1out(func, data, device):
     x = dpnp.array(data, device=device)
     result = getattr(dpnp, func)(x)
 
-    assert_array_equal(result, expected)
+    assert_allclose(result, expected)
 
     expected_queue = x.get_array().sycl_queue
     result_queue = result.get_array().sycl_queue
@@ -529,6 +532,33 @@ def test_random_state(func, args, kwargs, device, usm_type):
     assert_sycl_queue_equal(res_array.sycl_queue, sycl_queue)
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
+@pytest.mark.parametrize(
+    "func,data",
+    [
+        pytest.param("sqrt",
+                     [0., 1., 2., 3., 4., 5., 6., 7., 8.]),
+    ],
+)
+@pytest.mark.parametrize("device",
+                         valid_devices,
+                         ids=[device.filter_string for device in valid_devices])
+def test_out_1in_1out(func, data, device):
+    x_orig = numpy.array(data)
+    np_out = getattr(numpy, func)(x_orig)
+    expected = numpy.empty_like(np_out)
+    getattr(numpy, func)(x_orig, out=expected)
+
+    x = dpnp.array(data, device=device)
+    dp_out = getattr(dpnp, func)(x)
+    result = dpnp.empty_like(dp_out)
+    getattr(dpnp, func)(x, out=result)
+
+    assert_allclose(result, expected)
+
+    assert_sycl_queue_equal(result.sycl_queue, x.sycl_queue)
+
+
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @pytest.mark.parametrize(
     "func,data1,data2",
@@ -574,7 +604,7 @@ def test_random_state(func, args, kwargs, device, usm_type):
 @pytest.mark.parametrize("device",
                          valid_devices,
                          ids=[device.filter_string for device in valid_devices])
-def test_out(func, data1, data2, device):
+def test_out_2in_1out(func, data1, data2, device):
     x1_orig = numpy.array(data1)
     x2_orig = numpy.array(data2)
     np_out = getattr(numpy, func)(x1_orig, x2_orig)
diff --git a/tests/test_umath.py b/tests/test_umath.py
index 3a1f4467dcea..7b5c4b762d88 100644
--- a/tests/test_umath.py
+++ b/tests/test_umath.py
@@ -1,6 +1,7 @@
 import pytest
 from .helper import (
-    get_all_dtypes
+    get_all_dtypes,
+    get_float_dtypes
 )
 
 import numpy
@@ -402,3 +403,53 @@ def test_invalid_shape(self, shape):
 
         with pytest.raises(ValueError):
             dpnp.arctan2(dp_array, dp_array, out=dp_out)
+
+
+class TestSqrt:
+    @pytest.mark.parametrize("dtype", get_float_dtypes())
+    def test_sqrt_ordinary(self, dtype):
+        array_data = numpy.arange(10)
+        out = numpy.empty(10, dtype=dtype)
+
+        # DPNP
+        dp_array = dpnp.array(array_data, dtype=dtype)
+        dp_out = dpnp.array(out, dtype=dtype)
+        result = dpnp.sqrt(dp_array, out=dp_out)
+
+        # original
+        np_array = numpy.array(array_data, dtype=dtype)
+        expected = numpy.sqrt(np_array, out=out)
+
+        numpy.testing.assert_allclose(expected, result)
+        numpy.testing.assert_allclose(out, dp_out)
+
+    @pytest.mark.parametrize("dtype",
+                             [numpy.int64, numpy.int32],
+                             ids=['numpy.int64', 'numpy.int32'])
+    def test_invalid_dtype(self, dtype):
+
+        dp_array = dpnp.arange(10, dtype=dpnp.float32)
+        dp_out = dpnp.empty(10, dtype=dtype)
+
+        with pytest.raises(ValueError):
+            dpnp.sqrt(dp_array, out=dp_out)
+
+    @pytest.mark.parametrize("shape",
+                             [(0,), (15, ), (2, 2)],
+                             ids=['(0,)', '(15, )', '(2,2)'])
+    def test_invalid_shape(self, shape):
+
+        dp_array = dpnp.arange(10, dtype=dpnp.float32)
+        dp_out = dpnp.empty(shape, dtype=dpnp.float32)
+
+        with pytest.raises(ValueError):
+            dpnp.sqrt(dp_array, out=dp_out)
+
+    @pytest.mark.parametrize("out",
+                             [4, (), [], (3, 7), [2, 4]],
+                             ids=['4', '()', '[]', '(3, 7)', '[2, 4]'])
+    def test_invalid_out(self, out):
+        a = dpnp.arange(10)
+
+        numpy.testing.assert_raises(TypeError, dpnp.sqrt, a, out)
+        numpy.testing.assert_raises(TypeError, numpy.sqrt, a.asnumpy(), out)
diff --git a/tests/test_usm_type.py b/tests/test_usm_type.py
index 9c48a20fa268..df8575197b38 100644
--- a/tests/test_usm_type.py
+++ b/tests/test_usm_type.py
@@ -178,6 +178,23 @@ def test_meshgrid(usm_type_x, usm_type_y):
     assert z[1].usm_type == usm_type_y
 
 
+@pytest.mark.parametrize(
+    "func,data",
+    [
+        pytest.param(
+            "sqrt",
+            [1.0, 3.0, 9.0],
+        ),
+    ],
+)
+@pytest.mark.parametrize("usm_type", list_of_usm_types, ids=list_of_usm_types)
+def test_1in_1out(func, data, usm_type):
+    x = dp.array(data, usm_type=usm_type)
+    res = getattr(dp, func)(x)
+    assert x.usm_type == usm_type
+    assert res.usm_type == usm_type
+
+
 @pytest.mark.parametrize(
     "func,data1,data2",
     [

From bc628d0adf7656a4165f50f1f1b138541498a7f4 Mon Sep 17 00:00:00 2001
From: Natalia Polina <natalia.polina@intel.com>
Date: Wed, 5 Apr 2023 11:49:53 -0500
Subject: [PATCH 068/129] Added tests for function sum() with diffent data
 types of output and input argument.

---
 tests/test_mathematical.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/tests/test_mathematical.py b/tests/test_mathematical.py
index 7b01f79f9c7b..5f0d73b23b7b 100644
--- a/tests/test_mathematical.py
+++ b/tests/test_mathematical.py
@@ -941,3 +941,17 @@ def test_sum_empty_out(dtype):
     res = a.sum(out=out)
     assert_array_equal(out.asnumpy(), res.asnumpy())
     assert_array_equal(out.asnumpy(), numpy.array(0, dtype=dtype))
+
+
+@pytest.mark.parametrize("shape", [(), (1, 2, 3), (1, 0, 2), (10), (3, 3, 3), (5, 5), (0, 6)])
+@pytest.mark.parametrize("dtype_in", get_all_dtypes(no_complex=True, no_bool=True))
+@pytest.mark.parametrize("dtype_out", get_all_dtypes(no_complex=True, no_bool=True))
+def test_sum(shape, dtype_in, dtype_out):
+    a_np = numpy.ones(shape, dtype=dtype_in)
+    a = dpnp.ones(shape, dtype=dtype_in)
+    axes = [None, 0, 1, 2]
+    for axis in axes:
+        if axis is None or axis < a.ndim:
+            numpy_res = a_np.sum(axis=axis, dtype=dtype_out)
+            dpnp_res = a.sum(axis=axis, dtype=dtype_out)
+            assert_array_equal(numpy_res, dpnp_res.asnumpy())

From 1aec74d0355cb161550650befd23b21063f5433e Mon Sep 17 00:00:00 2001
From: Oleksandr Pavlyk <oleksandr.pavlyk@intel.com>
Date: Wed, 5 Apr 2023 14:18:45 -0500
Subject: [PATCH 069/129] Add dummy use of SKBUILD cmake variable to avoid
 warning

---
 CMakeLists.txt | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 088fd6ee1ca1..cdecc3cefd72 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -138,4 +138,8 @@ if (DPNP_GENERATE_COVERAGE)
     )
 endif()
 
+if(DEFINED SKBUILD)
+  set(_ignore_me ${SKBUILD})
+endif()
+
 add_subdirectory(dpnp)

From bc60d08d417508405ebeb252c5d044c04020ffe1 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Wed, 5 Apr 2023 19:43:50 +0200
Subject: [PATCH 070/129] add coveralls support

---
 .github/workflows/generate_coverage.yaml | 30 +++++++++++++++++++-----
 1 file changed, 24 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/generate_coverage.yaml b/.github/workflows/generate_coverage.yaml
index 7247af61437e..b112bee0c495 100644
--- a/.github/workflows/generate_coverage.yaml
+++ b/.github/workflows/generate_coverage.yaml
@@ -47,15 +47,15 @@ jobs:
         run: |
           conda info
           conda list
-      - name: Build dpnp with coverage
-        run: |
-          python scripts/gen_coverage.py
-        env:
-          OCL_ICD_FILENAMES: 'libintelocl.so'
+      # - name: Build dpnp with coverage
+      #   run: |
+      #     python scripts/gen_coverage.py
+      #   env:
+      #     OCL_ICD_FILENAMES: 'libintelocl.so'
       - name: Install coverall dependencies
         run: |
           sudo gem install coveralls-lcov
-          conda install coveralls
+          pip install coveralls
       - name: Upload coverage data to coveralls.io
         run: |
           echo "Processing pytest-coverage"
@@ -65,3 +65,21 @@ jobs:
               $DPNP_PYTEST_LCOV > pytest-dpnp-c-api-coverage.json
           # merge file with coverage data and upload
           ls -lh pytest-dpnp-c-api-coverage.json $(find . -name dpnp_pytest.lcov)
+          echo "Merging files with coverage data"
+          coveralls --service=github --merge=pytest-dpnp-c-api-coverage.json
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          COVERALLS_PARALLEL: true
+
+  coveralls:
+    name: Indicate completion to coveralls.io
+    needs: generate-coverage
+    runs-on: ubuntu-latest
+    container: python:3-slim
+    steps:
+    - name: Finished
+      run: |
+        pip3 install --upgrade coveralls
+        coveralls --finish
+      env:
+        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

From 58135beb612f856f3a0d7b6716628d45b43a55be Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Wed, 5 Apr 2023 22:30:54 +0200
Subject: [PATCH 071/129] Update generage_coverage.yaml

---
 .github/workflows/generate_coverage.yaml | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/.github/workflows/generate_coverage.yaml b/.github/workflows/generate_coverage.yaml
index b112bee0c495..3a1e17a8aad3 100644
--- a/.github/workflows/generate_coverage.yaml
+++ b/.github/workflows/generate_coverage.yaml
@@ -41,30 +41,27 @@ jobs:
           sudo apt-get install lcov
       - name: Install dpnp dependencies
         run: |
-          conda install cython llvm cmake scikit-build ninja pytest pytest-cov \
+          conda install cython llvm cmake scikit-build ninja pytest pytest-cov coverage[toml] \
               dppy/label/dev::dpctl dpcpp_linux-64  mkl-devel-dpcpp  tbb-devel  onedpl-devel
       - name: Conda info
         run: |
           conda info
           conda list
-      # - name: Build dpnp with coverage
-      #   run: |
-      #     python scripts/gen_coverage.py
-      #   env:
-      #     OCL_ICD_FILENAMES: 'libintelocl.so'
+      - name: Build dpnp with coverage
+        run: |
+          python scripts/gen_coverage.py
+        env:
+          OCL_ICD_FILENAMES: 'libintelocl.so'
       - name: Install coverall dependencies
         run: |
           sudo gem install coveralls-lcov
-          pip install coveralls
+          conda install coveralls
       - name: Upload coverage data to coveralls.io
         run: |
           echo "Processing pytest-coverage"
           export DPNP_PYTEST_LCOV=$(find . -name dpnp_pytest.lcov)
-          grep "/tmp" $DPNP_PYTEST_LCOV
-          coveralls-lcov -v -n \
-              $DPNP_PYTEST_LCOV > pytest-dpnp-c-api-coverage.json
+          coveralls-lcov -v -n $DPNP_PYTEST_LCOV > pytest-dpnp-c-api-coverage.json
           # merge file with coverage data and upload
-          ls -lh pytest-dpnp-c-api-coverage.json $(find . -name dpnp_pytest.lcov)
           echo "Merging files with coverage data"
           coveralls --service=github --merge=pytest-dpnp-c-api-coverage.json
         env:

From b9dd88e58ef49fb6575e3d25eb8a3bba0247baa3 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Wed, 5 Apr 2023 22:37:28 +0200
Subject: [PATCH 072/129] Enable running all tests for coverage

---
 scripts/gen_coverage.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/scripts/gen_coverage.py b/scripts/gen_coverage.py
index 0f43b84b6acf..f191878ab7a8 100644
--- a/scripts/gen_coverage.py
+++ b/scripts/gen_coverage.py
@@ -62,10 +62,9 @@ def run(
             "dpnp",
             "--cov-report",
             "term-missing",
-            "tests/test_absolute.py",
-            # "--pyargs",
-            # "dpnp",
-            # "-vv",
+            "--pyargs",
+            "tests",
+            "-vv",
         ],
         cwd=setup_dir,
         shell=False,

From 63b25711d9f7a39c3c68aaeb40e8ad5d7ea8c2e6 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Thu, 6 Apr 2023 00:25:40 +0200
Subject: [PATCH 073/129] ignore test_random.py

---
 scripts/gen_coverage.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/scripts/gen_coverage.py b/scripts/gen_coverage.py
index f191878ab7a8..91505e391624 100644
--- a/scripts/gen_coverage.py
+++ b/scripts/gen_coverage.py
@@ -64,6 +64,8 @@ def run(
             "term-missing",
             "--pyargs",
             "tests",
+            "--ignore",
+            "tests/test_random.py",
             "-vv",
         ],
         cwd=setup_dir,

From e71d9dc4220bb757638638268e117bfd57507bf4 Mon Sep 17 00:00:00 2001
From: Oleksandr Pavlyk <oleksandr.pavlyk@intel.com>
Date: Wed, 5 Apr 2023 20:00:54 -0500
Subject: [PATCH 074/129] Fix OCL CPU RT activation in test_windows step

---
 .github/workflows/conda-package.yml | 50 ++++-------------------------
 1 file changed, 6 insertions(+), 44 deletions(-)

diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml
index 52ac769b7fe8..17558a4a6bae 100644
--- a/.github/workflows/conda-package.yml
+++ b/.github/workflows/conda-package.yml
@@ -319,52 +319,14 @@ jobs:
       - name: List installed packages
         run: conda list
 
-      - name: Add library
+      - name: Activate OCL CPU RT
         shell: pwsh
         run: |
-          # Make sure the below libraries exist
-          Get-Item -Path "$env:CONDA_LIB_BIN_PATH\OpenCL.dll"
-          Get-Item -Path "$env:CONDA_LIB_PATH\intelocl64.dll"
-
-          echo "OCL_ICD_FILENAMES = $env:CONDA_LIB_PATH\intelocl64.dll" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
-          try {$list = Get-Item -Path HKLM:\SOFTWARE\Khronos\OpenCL\Vendors | Select-Object -ExpandProperty Property } catch {$list=@()}
-
-          if ($list.count -eq 0) {
-              if (-not (Test-Path -Path HKLM:\SOFTWARE\Khronos)) {
-                 New-Item -Path HKLM:\SOFTWARE\Khronos
-              }
-
-              if (-not (Test-Path -Path HKLM:\SOFTWARE\Khronos\OpenCL)) {
-                 New-Item -Path HKLM:\SOFTWARE\Khronos\OpenCL
-              }
-
-              if (-not (Test-Path -Path HKLM:\SOFTWARE\Khronos\OpenCL\Vendors)) {
-                 New-Item -Path HKLM:\SOFTWARE\Khronos\OpenCL\Vendors
-              }
-
-              New-ItemProperty -Path HKLM:\SOFTWARE\Khronos\OpenCL\Vendors -Name "$env:CONDA_LIB_PATH\intelocl64.dll" -Value 0
-              try {$list = Get-Item -Path HKLM:\SOFTWARE\Khronos\OpenCL\Vendors | Select-Object -ExpandProperty Property } catch {$list=@()}
-              Write-Output $(Get-Item -Path HKLM:\SOFTWARE\Khronos\OpenCL\Vendors)
-
-              # Now copy OpenCL.dll into system folder
-              $system_ocl_icd_loader="C:\Windows\System32\OpenCL.dll"
-              $python_ocl_icd_loader="$env:CONDA_LIB_BIN_PATH\OpenCL.dll"
-              Copy-Item -Path $python_ocl_icd_loader -Destination $system_ocl_icd_loader
-
-              if (Test-Path -Path $system_ocl_icd_loader) {
-                 Write-Output "$system_ocl_icd_loader has been copied"
-                 $acl = Get-Acl $system_ocl_icd_loader
-                 Write-Output $acl
-              } else {
-                 Write-Output "OCL-ICD-Loader was not copied"
-              }
-
-              # Configuration variable assisting OpenCL CPU driver to find TBB DLLs which are not located where it expects them by default
-              $cl_cfg="$env:CONDA_LIB_PATH\cl.cfg"
-              Write-Output "`n>>> Dump content of $cl_cfg`n" (Get-Content $cl_cfg) "`n<<< end of dump`n"
-              (Get-Content $cl_cfg) -replace '^CL_CONFIG_TBB_DLL_PATH =.*', "CL_CONFIG_TBB_DLL_PATH = $env:CONDA_LIB_BIN_PATH" | Set-Content $cl_cfg
-              Write-Output "`n>>> Dump content of modified $cl_cfg`n" (Get-Content $cl_cfg) "`n<<< end of dump`n"
-          }
+          $script_path="$env:CONDA_PREFIX\Scripts\set-intel-ocl-icd-registry.ps1"
+          &$script_path
+          # Check the variable assisting OpenCL CPU driver to find TBB DLLs which are not located where it expects them by default
+          $cl_cfg="$env:CONDA_PREFIX\Library\lib\cl.cfg"
+          Get-Content -Tail 5 -Path $cl_cfg
 
       - name: Smoke test
         run: python -c "import dpnp, dpctl; dpctl.lsplatform()"

From 95232dcd605e86690e0a2a5a92233b5c1af0d3f6 Mon Sep 17 00:00:00 2001
From: Oleksandr Pavlyk <oleksandr.pavlyk@intel.com>
Date: Thu, 6 Apr 2023 03:01:40 -0500
Subject: [PATCH 075/129] No need to set OCL_ICD_FILENAMES on Linux either with
 intel-opencl-rt=2023.1.0

---
 .github/workflows/conda-package.yml | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml
index 17558a4a6bae..02e68ac8e82b 100644
--- a/.github/workflows/conda-package.yml
+++ b/.github/workflows/conda-package.yml
@@ -189,15 +189,11 @@ jobs:
 
       - name: Smoke test
         run: python -c "import dpnp, dpctl; dpctl.lsplatform()"
-        env:
-          OCL_ICD_FILENAMES: 'libintelocl.so'
 
       # TODO: run the whole scope once the issues on CPU are resolved
       - name: Run tests
         run: |
           python -m pytest -q -ra --disable-warnings -vv ${{ env.TEST_SCOPE }}
-        env:
-          OCL_ICD_FILENAMES: 'libintelocl.so'
         working-directory: ${{ env.tests-path }}
 
   test_windows:

From ee174546ef9492a348204028ea931e40e0d90071 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Thu, 6 Apr 2023 13:14:00 +0200
Subject: [PATCH 076/129] Skip falling tests on cpu

---
 tests/skipped_tests.tbl | 114 +++++++++++++++++++++++++++++++++++-----
 1 file changed, 102 insertions(+), 12 deletions(-)

diff --git a/tests/skipped_tests.tbl b/tests/skipped_tests.tbl
index 2e7a5a6d6f3e..22bbce3616aa 100644
--- a/tests/skipped_tests.tbl
+++ b/tests/skipped_tests.tbl
@@ -186,6 +186,12 @@ tests/test_linalg.py::test_svd[(3,4)-float64]
 tests/test_linalg.py::test_svd[(5,3)-float64]
 tests/test_linalg.py::test_svd[(16,16)-float64]
 
+tests/test_logic.py::test_allclose[int32]
+tests/test_logic.py::test_allclose[int64]
+tests/test_logic.py::test_allclose[float32]
+tests/test_logic.py::test_allclose[float64]
+tests/test_logic.py::test_allclose[None]
+
 tests/test_random.py::TestPermutationsTestShuffle::test_shuffle1[lambda x: dpnp.asarray([[i, i] for i in x])]
 tests/test_random.py::TestPermutationsTestShuffle::test_shuffle1[lambda x: (dpnp.asarray([(i, i) for i in x], [("a", int), ("b", int)]).view(dpnp.recarray))]
 tests/test_random.py::TestPermutationsTestShuffle::test_shuffle1[lambda x: dpnp.asarray([(i, i) for i in x], [("a", object), ("b", dpnp.int32)])]]
@@ -210,6 +216,11 @@ tests/third_party/cupy/core_tests/test_ndarray_complex_ops.py::TestRealImag::tes
 tests/third_party/cupy/core_tests/test_ndarray_complex_ops.py::TestRealImag::test_real_setter_zero_dim
 tests/third_party/cupy/core_tests/test_ndarray_complex_ops.py::TestRealImag::test_real_zero_dim
 tests/third_party/cupy/core_tests/test_ndarray_complex_ops.py::TestScalarConversion::test_scalar_conversion
+tests/third_party/cupy/core_tests/test_ndarray_conversion.py::TestNdarrayToBytes_param_0_{shape=()}::test_item
+tests/third_party/cupy/core_tests/test_ndarray_conversion.py::TestNdarrayToBytes_param_1_{shape=(1,)}::test_item
+tests/third_party/cupy/core_tests/test_ndarray_conversion.py::TestNdarrayToBytes_param_2_{shape=(2, 3)}::test_item
+tests/third_party/cupy/core_tests/test_ndarray_conversion.py::TestNdarrayToBytes_param_3_{order='C', shape=(2, 3)}::test_item
+tests/third_party/cupy/core_tests/test_ndarray_conversion.py::TestNdarrayToBytes_param_4_{order='F', shape=(2, 3)}::test_item
 tests/third_party/cupy/core_tests/test_ndarray_copy_and_view.py::TestArrayCopyAndView::test_astype
 tests/third_party/cupy/core_tests/test_ndarray_copy_and_view.py::TestArrayCopyAndView::test_astype_type
 tests/third_party/cupy/core_tests/test_ndarray_copy_and_view.py::TestArrayCopyAndView::test_astype_strides
@@ -233,11 +244,16 @@ tests/third_party/cupy/core_tests/test_ndarray_copy_and_view.py::TestArrayCopyAn
 tests/third_party/cupy/core_tests/test_ndarray_copy_and_view.py::TestArrayCopyAndView::test_view_non_contiguous_raise
 tests/third_party/cupy/core_tests/test_ndarray_copy_and_view.py::TestNumPyArrayCopyView_param_0_{src_order='C'}::test_isinstance_numpy_view_copy_f
 tests/third_party/cupy/core_tests/test_ndarray_copy_and_view.py::TestNumPyArrayCopyView_param_1_{src_order='F'}::test_isinstance_numpy_view_copy_f
+tests/third_party/cupy/core_tests/test_ndarray_math.py::TestRoundHalfway_param_0_{decimals=-3}::test_round_halfway_float
+tests/third_party/cupy/core_tests/test_ndarray_math.py::TestRoundHalfway_param_1_{decimals=-2}::test_round_halfway_float
+tests/third_party/cupy/core_tests/test_ndarray_math.py::TestRoundHalfway_param_2_{decimals=-1}::test_round_halfway_float
+tests/third_party/cupy/core_tests/test_ndarray_math.py::TestRoundHalfway_param_3_{decimals=0}::test_round_halfway_float
 tests/third_party/cupy/core_tests/test_ndarray_math.py::TestRoundHalfway_param_0_{decimals=-3}::test_round_halfway_uint
 tests/third_party/cupy/core_tests/test_ndarray_math.py::TestRoundHalfway_param_1_{decimals=-2}::test_round_halfway_uint
 tests/third_party/cupy/core_tests/test_ndarray_math.py::TestRoundHalfway_param_2_{decimals=-1}::test_round_halfway_uint
 tests/third_party/cupy/core_tests/test_ndarray_math.py::TestRoundHalfway_param_3_{decimals=0}::test_round_halfway_uint
 
+
 tests/third_party/cupy/core_tests/test_ndarray_reduction.py::TestArrayReduction::test_min_nan
 tests/third_party/cupy/core_tests/test_ndarray_reduction.py::TestArrayReduction::test_ptp_all
 tests/third_party/cupy/core_tests/test_ndarray_reduction.py::TestArrayReduction::test_ptp_all_keepdims
@@ -385,6 +401,11 @@ tests/third_party/cupy/creation_tests/test_from_data.py::TestFromData::test_asco
 tests/third_party/cupy/creation_tests/test_from_data.py::TestFromData::test_asfortranarray_cuda_array_zero_dim
 tests/third_party/cupy/creation_tests/test_from_data.py::TestFromData::test_asfortranarray_cuda_array_zero_dim_dtype
 
+tests/third_party/cupy/creation_tests/test_matrix.py::TestMatrix::test_diag_construction_from_list
+tests/third_party/cupy/creation_tests/test_matrix.py::TestMatrix::test_diag_construction_from_tuple
+tests/third_party/cupy/creation_tests/test_matrix.py::TestMatrix::test_diag_extraction_from_nested_list
+tests/third_party/cupy/creation_tests/test_matrix.py::TestMatrix::test_diag_extraction_from_nested_tuple
+
 tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_0_{copy=False, indexing='xy', sparse=False}::test_meshgrid0
 tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_0_{copy=False, indexing='xy', sparse=False}::test_meshgrid1
 tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_0_{copy=False, indexing='xy', sparse=False}::test_meshgrid2
@@ -592,7 +613,9 @@ tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumLarge_param_8_{opt
 tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumLarge_param_9_{opt='optimal', subscript='acdf,jbje,gihb,hfac,gfac,gifabc,hfac'}::test_einsum
 tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumUnaryOperationWithScalar::test_scalar_float
 tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumUnaryOperationWithScalar::test_scalar_int
+tests/third_party/cupy/linalg_tests/test_einsum.py::TestListArgEinSumError::test_dim_mismatch3
 tests/third_party/cupy/linalg_tests/test_einsum.py::TestListArgEinSumError::test_invalid_sub1
+tests/third_party/cupy/linalg_tests/test_einsum.py::TestListArgEinSumError::test_too_many_dims3
 tests/third_party/cupy/linalg_tests/test_product.py::TestMatrixPower::test_matrix_power_invlarge
 tests/third_party/cupy/linalg_tests/test_product.py::TestMatrixPower::test_matrix_power_large
 tests/third_party/cupy/linalg_tests/test_product.py::TestMatrixPower::test_matrix_power_of_two
@@ -601,7 +624,14 @@ tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_multidim_
 tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_tensordot_zero_dim
 tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_transposed_dot_with_out_f_contiguous
 tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_transposed_multidim_vdot
+tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_transposed_tensordot
+tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_transposed_tensordot_with_int_axes
+tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_transposed_tensordot_with_list_axes
+tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_reversed_vdot
+tests/third_party/cupy/logic_tests/test_comparison.py::TestAllclose::test_allclose_array_scalar
+tests/third_party/cupy/logic_tests/test_comparison.py::TestAllclose::test_allclose_finite
 tests/third_party/cupy/logic_tests/test_comparison.py::TestAllclose::test_allclose_infinite
+tests/third_party/cupy/logic_tests/test_comparison.py::TestAllclose::test_allclose_infinite_equal_nan
 tests/third_party/cupy/logic_tests/test_comparison.py::TestAllclose::test_allclose_min_int
 tests/third_party/cupy/logic_tests/test_comparison.py::TestArrayEqual::test_array_equal_broadcast_not_allowed
 tests/third_party/cupy/logic_tests/test_comparison.py::TestArrayEqual::test_array_equal_diff_dtypes_is_equal
@@ -745,6 +775,7 @@ tests/third_party/cupy/math_tests/test_floating.py::TestFloating::test_ldexp
 tests/third_party/cupy/math_tests/test_floating.py::TestFloating::test_nextafter_combination
 tests/third_party/cupy/math_tests/test_floating.py::TestFloating::test_nextafter_float
 tests/third_party/cupy/math_tests/test_floating.py::TestFloating::test_signbit
+tests/third_party/cupy/math_tests/test_misc.py::TestMisc::test_absolute_negative
 tests/third_party/cupy/math_tests/test_misc.py::TestMisc::test_clip1
 tests/third_party/cupy/math_tests/test_misc.py::TestMisc::test_clip2
 tests/third_party/cupy/math_tests/test_misc.py::TestMisc::test_clip3
@@ -762,6 +793,7 @@ tests/third_party/cupy/math_tests/test_misc.py::TestMisc::test_nan_to_num_inf_na
 tests/third_party/cupy/math_tests/test_misc.py::TestMisc::test_nan_to_num_nan
 tests/third_party/cupy/math_tests/test_misc.py::TestMisc::test_nan_to_num_negative
 tests/third_party/cupy/math_tests/test_misc.py::TestMisc::test_nan_to_num_negative_for_old_numpy
+tests/third_party/cupy/math_tests/test_misc.py::TestMisc::test_sign_negative
 tests/third_party/cupy/math_tests/test_rounding.py::TestRoundBorder_param_0_{value=(14, -1)}::test_around_negative2
 tests/third_party/cupy/math_tests/test_rounding.py::TestRoundBorder_param_0_{value=(14, -1)}::test_around_positive2
 tests/third_party/cupy/math_tests/test_rounding.py::TestRoundBorder_param_1_{value=(15, -1)}::test_around_negative2
@@ -823,6 +855,7 @@ tests/third_party/cupy/math_tests/test_sumprod.py::TestDiff::test_diff_2dim_with
 tests/third_party/cupy/math_tests/test_sumprod.py::TestCumprod::test_cumprod_arraylike
 tests/third_party/cupy/math_tests/test_sumprod.py::TestCumprod::test_cumprod_huge_array
 tests/third_party/cupy/math_tests/test_sumprod.py::TestCumprod::test_cumprod_numpy_array
+tests/third_party/cupy/math_tests/test_sumprod.py::TestCumprod::test_cumprod_out_noncontiguous
 tests/third_party/cupy/math_tests/test_sumprod.py::TestCumsum_param_0_{axis=0}::test_cumsum_arraylike
 tests/third_party/cupy/math_tests/test_sumprod.py::TestCumsum_param_0_{axis=0}::test_cumsum_numpy_array
 tests/third_party/cupy/math_tests/test_sumprod.py::TestCumsum_param_1_{axis=1}::test_cumsum_arraylike
@@ -865,6 +898,40 @@ tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsExpo
 tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsExponential_param_2_{scale_shape=(), shape=None}::test_exponential
 tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsGeometric_param_0_{p_shape=(), shape=(4, 3, 2)}::test_geometric
 tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsGeometric_param_1_{p_shape=(), shape=(3, 2)}::test_geometric
+tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsGeometric_param_2_{p_shape=(3, 2), shape=(4, 3, 2)}::test_geometric
+tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsGeometric_param_3_{p_shape=(3, 2), shape=(3, 2)}::test_geometric
+tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsHyperGeometric_param_0_{nbad_shape=(), ngood_shape=(), nsample_dtype=int32, nsample_shape=(), shape=(4, 3, 2)}::test_hypergeometric
+tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsHyperGeometric_param_1_{nbad_shape=(), ngood_shape=(), nsample_dtype=int32, nsample_shape=(), shape=(3, 2)}::test_hypergeometric
+tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsHyperGeometric_param_2_{nbad_shape=(), ngood_shape=(), nsample_dtype=int32, nsample_shape=(3, 2), shape=(4, 3, 2)}::test_hypergeometric
+tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsHyperGeometric_param_3_{nbad_shape=(), ngood_shape=(), nsample_dtype=int32, nsample_shape=(3, 2), shape=(3, 2)}::test_hypergeometric
+tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsHyperGeometric_param_4_{nbad_shape=(), ngood_shape=(), nsample_dtype=int64, nsample_shape=(), shape=(4, 3, 2)}::test_hypergeometric
+tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsHyperGeometric_param_5_{nbad_shape=(), ngood_shape=(), nsample_dtype=int64, nsample_shape=(), shape=(3, 2)}::test_hypergeometric
+tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsHyperGeometric_param_6_{nbad_shape=(), ngood_shape=(), nsample_dtype=int64, nsample_shape=(3, 2), shape=(4, 3, 2)}::test_hypergeometric
+tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsHyperGeometric_param_7_{nbad_shape=(), ngood_shape=(), nsample_dtype=int64, nsample_shape=(3, 2), shape=(3, 2)}::test_hypergeometric
+tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsHyperGeometric_param_8_{nbad_shape=(), ngood_shape=(3, 2), nsample_dtype=int32, nsample_shape=(), shape=(4, 3, 2)}::test_hypergeometric
+tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsHyperGeometric_param_9_{nbad_shape=(), ngood_shape=(3, 2), nsample_dtype=int32, nsample_shape=(), shape=(3, 2)}::test_hypergeometric
+tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsHyperGeometric_param_10_{nbad_shape=(), ngood_shape=(3, 2), nsample_dtype=int32, nsample_shape=(3, 2), shape=(4, 3, 2)}::test_hypergeometric
+tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsHyperGeometric_param_11_{nbad_shape=(), ngood_shape=(3, 2), nsample_dtype=int32, nsample_shape=(3, 2), shape=(3, 2)}::test_hypergeometric
+tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsHyperGeometric_param_12_{nbad_shape=(), ngood_shape=(3, 2), nsample_dtype=int64, nsample_shape=(), shape=(4, 3, 2)}::test_hypergeometric
+tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsHyperGeometric_param_13_{nbad_shape=(), ngood_shape=(3, 2), nsample_dtype=int64, nsample_shape=(), shape=(3, 2)}::test_hypergeometric
+tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsHyperGeometric_param_14_{nbad_shape=(), ngood_shape=(3, 2), nsample_dtype=int64, nsample_shape=(3, 2), shape=(4, 3, 2)}::test_hypergeometric
+tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsHyperGeometric_param_15_{nbad_shape=(), ngood_shape=(3, 2), nsample_dtype=int64, nsample_shape=(3, 2), shape=(3, 2)}::test_hypergeometric
+tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsHyperGeometric_param_16_{nbad_shape=(3, 2), ngood_shape=(), nsample_dtype=int32, nsample_shape=(), shape=(4, 3, 2)}::test_hypergeometric
+tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsHyperGeometric_param_17_{nbad_shape=(3, 2), ngood_shape=(), nsample_dtype=int32, nsample_shape=(), shape=(3, 2)}::test_hypergeometric
+tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsHyperGeometric_param_18_{nbad_shape=(3, 2), ngood_shape=(), nsample_dtype=int32, nsample_shape=(3, 2), shape=(4, 3, 2)}::test_hypergeometric
+tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsHyperGeometric_param_19_{nbad_shape=(3, 2), ngood_shape=(), nsample_dtype=int32, nsample_shape=(3, 2), shape=(3, 2)}::test_hypergeometric
+tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsHyperGeometric_param_20_{nbad_shape=(3, 2), ngood_shape=(), nsample_dtype=int64, nsample_shape=(), shape=(4, 3, 2)}::test_hypergeometric
+tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsHyperGeometric_param_21_{nbad_shape=(3, 2), ngood_shape=(), nsample_dtype=int64, nsample_shape=(), shape=(3, 2)}::test_hypergeometric
+tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsHyperGeometric_param_22_{nbad_shape=(3, 2), ngood_shape=(), nsample_dtype=int64, nsample_shape=(3, 2), shape=(4, 3, 2)}::test_hypergeometric
+tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsHyperGeometric_param_23_{nbad_shape=(3, 2), ngood_shape=(), nsample_dtype=int64, nsample_shape=(3, 2), shape=(3, 2)}::test_hypergeometric
+tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsHyperGeometric_param_24_{nbad_shape=(3, 2), ngood_shape=(3, 2), nsample_dtype=int32, nsample_shape=(), shape=(4, 3, 2)}::test_hypergeometric
+tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsHyperGeometric_param_25_{nbad_shape=(3, 2), ngood_shape=(3, 2), nsample_dtype=int32, nsample_shape=(), shape=(3, 2)}::test_hypergeometric
+tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsHyperGeometric_param_26_{nbad_shape=(3, 2), ngood_shape=(3, 2), nsample_dtype=int32, nsample_shape=(3, 2), shape=(4, 3, 2)}::test_hypergeometric
+tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsHyperGeometric_param_27_{nbad_shape=(3, 2), ngood_shape=(3, 2), nsample_dtype=int32, nsample_shape=(3, 2), shape=(3, 2)}::test_hypergeometric
+tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsHyperGeometric_param_28_{nbad_shape=(3, 2), ngood_shape=(3, 2), nsample_dtype=int64, nsample_shape=(), shape=(4, 3, 2)}::test_hypergeometric
+tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsHyperGeometric_param_29_{nbad_shape=(3, 2), ngood_shape=(3, 2), nsample_dtype=int64, nsample_shape=(), shape=(3, 2)}::test_hypergeometric
+tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsHyperGeometric_param_30_{nbad_shape=(3, 2), ngood_shape=(3, 2), nsample_dtype=int64, nsample_shape=(3, 2), shape=(4, 3, 2)}::test_hypergeometric
+tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsHyperGeometric_param_31_{nbad_shape=(3, 2), ngood_shape=(3, 2), nsample_dtype=int64, nsample_shape=(3, 2), shape=(3, 2)}::test_hypergeometric
 tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsGumbel_param_0_{loc_shape=(), scale_shape=(), shape=(4, 3, 2)}::test_gumbel
 tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsGumbel_param_1_{loc_shape=(), scale_shape=(), shape=(3, 2)}::test_gumbel
 tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsGumbel_param_2_{loc_shape=(), scale_shape=(3, 2), shape=(4, 3, 2)}::test_gumbel
@@ -920,6 +987,10 @@ tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsNonc
 tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsNoncentralF_param_8_{dfden_shape=(3, 2), dfnum_shape=(), nonc_shape=(), shape=(4, 3, 2)}::test_noncentral_f
 tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsNoncentralF_param_9_{dfden_shape=(3, 2), dfnum_shape=(), nonc_shape=(), shape=(3, 2)}::test_noncentral_f
 
+tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsPoisson_param_0_{lam_shape=(), shape=(4, 3, 2)}::test_poisson
+tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsPoisson_param_1_{lam_shape=(), shape=(3, 2)}::test_poisson
+tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsPoisson_param_2_{lam_shape=(3, 2), shape=(4, 3, 2)}::test_poisson
+tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsPoisson_param_3_{lam_shape=(3, 2), shape=(3, 2)}::test_poisson
 tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsPower_param_0_{a_shape=(), shape=(4, 3, 2)}::test_power
 tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsPower_param_0_{a_shape=(), shape=(4, 3, 2)}::test_power_for_negative_a
 tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsPower_param_1_{a_shape=(), shape=(3, 2)}::test_power
@@ -1076,10 +1147,28 @@ tests/third_party/cupy/sorting_tests/test_sort.py::TestArgpartition_param_1_{ext
 tests/third_party/cupy/sorting_tests/test_sort.py::TestArgpartition_param_1_{external=True}::test_argpartition_one_dim
 tests/third_party/cupy/sorting_tests/test_sort.py::TestArgpartition_param_1_{external=True}::test_argpartition_sequence_kth
 tests/third_party/cupy/sorting_tests/test_sort.py::TestArgpartition_param_1_{external=True}::test_argpartition_zero_dim
+tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_argsort_axis
+tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_argsort_invalid_axis1
+tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_argsort_invalid_axis2
+tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_argsort_invalid_negative_axis1
+tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_argsort_invalid_negative_axis2
 tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_argsort_multi_dim
+tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_argsort_negative_axis
+tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_argsort_none_axis
+tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_argsort_zero_dim_axis
+tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_argsort_zero_dim_invalid_axis
 tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_nan1
 tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_nan2
+tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_1_{external=True}::test_argsort_axis
+tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_1_{external=True}::test_argsort_invalid_axis1
+tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_1_{external=True}::test_argsort_invalid_axis2
+tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_1_{external=True}::test_argsort_invalid_negative_axis1
+tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_1_{external=True}::test_argsort_invalid_negative_axis2
 tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_1_{external=True}::test_argsort_multi_dim
+tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_1_{external=True}::test_argsort_negative_axis
+tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_1_{external=True}::test_argsort_none_axis
+tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_1_{external=True}::test_argsort_zero_dim_axis
+tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_1_{external=True}::test_argsort_zero_dim_invalid_axis
 tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_1_{external=True}::test_nan1
 tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_1_{external=True}::test_nan2
 tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_argsort_non_contiguous
@@ -1087,18 +1176,18 @@ tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external
 tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_argsort_original_array_not_modified_multi_dim
 tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_argsort_original_array_not_modified_one_dim
 tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_argsort_zero_dim
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}
+tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}::test_partition_invalid_axis1
+tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}::test_partition_invalid_axis2
+tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}::test_partition_invalid_kth
+tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}::test_partition_invalid_negative_axis1
+tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}::test_partition_invalid_negative_axis2
+tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}::test_partition_invalid_negative_kth
+tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}::test_partition_invalid_axis1
+tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}::test_partition_invalid_axis2
+tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}::test_partition_invalid_kth
+tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}::test_partition_invalid_negative_axis1
+tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}::test_partition_invalid_negative_axis2
+tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}::test_partition_invalid_negative_kth
 tests/third_party/cupy/sorting_tests/test_sort.py::TestLexsort::test_F_order
 tests/third_party/cupy/sorting_tests/test_sort.py::TestLexsort::test_lexsort_dtype
 tests/third_party/cupy/sorting_tests/test_sort.py::TestLexsort::test_lexsort_three_or_more_dim
@@ -1245,6 +1334,7 @@ tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_h
 tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_same_value
 
 tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_weights_mismatch
+tests/third_party/cupy/statistics_tests/test_meanvar.py::TestMeanVar::test_external_mean_axis
 tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanMeanAdditional::test_nanmean_all_nan
 tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanMeanAdditional::test_nanmean_float16
 tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanMeanAdditional::test_nanmean_huge

From b419bcea15983b674a08a6736b22d7008f7a35d0 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Thu, 6 Apr 2023 13:42:55 +0200
Subject: [PATCH 077/129] Skip falling tests in test_random and remove --ignore
 flag in pytest

---
 scripts/gen_coverage.py | 2 --
 tests/skipped_tests.tbl | 5 +++++
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/scripts/gen_coverage.py b/scripts/gen_coverage.py
index 91505e391624..f191878ab7a8 100644
--- a/scripts/gen_coverage.py
+++ b/scripts/gen_coverage.py
@@ -64,8 +64,6 @@ def run(
             "term-missing",
             "--pyargs",
             "tests",
-            "--ignore",
-            "tests/test_random.py",
             "-vv",
         ],
         cwd=setup_dir,
diff --git a/tests/skipped_tests.tbl b/tests/skipped_tests.tbl
index 22bbce3616aa..a41b881ae7b3 100644
--- a/tests/skipped_tests.tbl
+++ b/tests/skipped_tests.tbl
@@ -1,5 +1,10 @@
 tests/test_histograms.py::TestHistogram::test_density
 
+tests/test_random.py::TestDistributionsMultivariateNormal::test_moments
+tests/test_random.py::TestDistributionsMultivariateNormal::test_output_shape_check
+tests/test_random.py::TestDistributionsMultivariateNormal::test_seed
+tests/test_random.py::TestPermutationsTestShuffle::test_shuffle1[lambda x: dpnp.vstack([x, x]).T]
+
 tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-trapz-data19]
 tests/test_sycl_queue.py::test_1in_1out[opencl:cpu:0-trapz-data19]
 tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-floor_divide-data12-data22]

From e9cfc003a469b15711cc088af8d99fba452e02ec Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Thu, 6 Apr 2023 15:44:50 +0200
Subject: [PATCH 078/129] Add parser for command-line options

---
 .github/workflows/generate_coverage.yaml |  2 +-
 scripts/gen_coverage.py                  | 18 ++++++++++++++++++
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/generate_coverage.yaml b/.github/workflows/generate_coverage.yaml
index 3a1e17a8aad3..e353521a24bf 100644
--- a/.github/workflows/generate_coverage.yaml
+++ b/.github/workflows/generate_coverage.yaml
@@ -49,7 +49,7 @@ jobs:
           conda list
       - name: Build dpnp with coverage
         run: |
-          python scripts/gen_coverage.py
+          python scripts/gen_coverage.py --pytest_opts="--ignore tests/test_random.py"
         env:
           OCL_ICD_FILENAMES: 'libintelocl.so'
       - name: Install coverall dependencies
diff --git a/scripts/gen_coverage.py b/scripts/gen_coverage.py
index f191878ab7a8..65677f15b3a0 100644
--- a/scripts/gen_coverage.py
+++ b/scripts/gen_coverage.py
@@ -6,6 +6,7 @@ def run(
     c_compiler=None,
     cxx_compiler=None,
     bin_llvm=None,
+    pytest_opts = "",
 ):
 
     IS_LIN = False
@@ -65,6 +66,7 @@ def run(
             "--pyargs",
             "tests",
             "-vv",
+            *pytest_opts.split(),
         ],
         cwd=setup_dir,
         shell=False,
@@ -112,6 +114,21 @@ def find_objects():
         )
 
 if __name__ == "__main__":
+    import argparse
+
+    parser = argparse.ArgumentParser(
+        description="Driver to build dpnp and generate coverage"
+    )
+    driver = parser.add_argument_group(title="Coverage driver arguments")
+    driver.add_argument(
+        "--pytest-opts",
+        help="Channels through additional pytest options",
+        dest="pytest_opts",
+        default="",
+        type=str,
+    )
+
+    args = parser.parse_args()
 
     c_compiler = "icx"
     cxx_compiler = "icpx"
@@ -124,4 +141,5 @@ def find_objects():
         c_compiler=c_compiler,
         cxx_compiler=cxx_compiler,
         bin_llvm=bin_llvm,
+        pytest_opts = args.pytest_opts,
     )

From ee6d2e42790d06212c75bf7bad7cdbc42bbf199e Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Thu, 6 Apr 2023 15:53:55 +0200
Subject: [PATCH 079/129] Fix a mistake

---
 .github/workflows/generate_coverage.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/generate_coverage.yaml b/.github/workflows/generate_coverage.yaml
index e353521a24bf..69fc76bbbb40 100644
--- a/.github/workflows/generate_coverage.yaml
+++ b/.github/workflows/generate_coverage.yaml
@@ -49,7 +49,7 @@ jobs:
           conda list
       - name: Build dpnp with coverage
         run: |
-          python scripts/gen_coverage.py --pytest_opts="--ignore tests/test_random.py"
+          python scripts/gen_coverage.py --pytest-opts="--ignore tests/test_random.py"
         env:
           OCL_ICD_FILENAMES: 'libintelocl.so'
       - name: Install coverall dependencies

From 67b488c70b6da357cb8cffa017dadcbe152e33c4 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Thu, 6 Apr 2023 16:30:46 +0200
Subject: [PATCH 080/129] Change branch name in generate_coverage.yaml

---
 .github/workflows/generate_coverage.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/generate_coverage.yaml b/.github/workflows/generate_coverage.yaml
index 69fc76bbbb40..cf97f5acfb38 100644
--- a/.github/workflows/generate_coverage.yaml
+++ b/.github/workflows/generate_coverage.yaml
@@ -2,7 +2,7 @@ name: Generate coverage data for dpnp
 on:
   pull_request:
   push:
-    branches: [add_gen_coverage]
+    branches: [use-skbuild-and-cmake]
 
 jobs:
   generate-coverage:

From a8950c55e8f55ec230ef189d096fe5d8da783bf1 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Thu, 6 Apr 2023 19:57:39 +0200
Subject: [PATCH 081/129] Fix remarks in CMakeLists

---
 dpnp/CMakeLists.txt | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/dpnp/CMakeLists.txt b/dpnp/CMakeLists.txt
index 89435e4577e9..8309b0422e4e 100644
--- a/dpnp/CMakeLists.txt
+++ b/dpnp/CMakeLists.txt
@@ -30,18 +30,18 @@ function(build_dpnp_cython_ext _trgt _src _dest)
          -P ${CMAKE_SOURCE_DIR}/dpnp/cmake/copy_existing.cmake
      DEPENDS ${_trgt}
      VERBATIM
-           COMMENT "Copying Cython-generated source to dpnp"
+           COMMENT "Copying Cython-generated source for target ${_trgt} to dpnp source layout"
        )
   endif()
   install(TARGETS ${_trgt} LIBRARY DESTINATION ${_dest})
 endfunction()
 
 function(build_dpnp_cython_ext_with_backend _trgt _src _dest)
-build_dpnp_cython_ext(${_trgt} ${_src} ${_dest})
-target_link_libraries(${_trgt} dpnp_backend_library)
-if (UNIX)
-  set_target_properties(${_trgt} PROPERTIES INSTALL_RPATH "$ORIGIN/..")
-endif()
+  build_dpnp_cython_ext(${_trgt} ${_src} ${_dest})
+  target_link_libraries(${_trgt} dpnp_backend_library)
+  if (UNIX)
+    set_target_properties(${_trgt} PROPERTIES INSTALL_RPATH "$ORIGIN/..")
+  endif()
 endfunction()
 
 

From a9d79062346393d64abcb3454f7ca257eb0f7095 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Thu, 6 Apr 2023 20:05:05 +0200
Subject: [PATCH 082/129] Add --ignore test_strides.py in yaml

---
 .github/workflows/generate_coverage.yaml | 3 ++-
 tests/test_strides.py                    | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/generate_coverage.yaml b/.github/workflows/generate_coverage.yaml
index cf97f5acfb38..5e2498758719 100644
--- a/.github/workflows/generate_coverage.yaml
+++ b/.github/workflows/generate_coverage.yaml
@@ -49,7 +49,8 @@ jobs:
           conda list
       - name: Build dpnp with coverage
         run: |
-          python scripts/gen_coverage.py --pytest-opts="--ignore tests/test_random.py"
+          python scripts/gen_coverage.py --pytest-opts="--ignore tests/test_random.py \
+                                                        --ignore tests/test_strides.py"
         env:
           OCL_ICD_FILENAMES: 'libintelocl.so'
       - name: Install coverall dependencies
diff --git a/tests/test_strides.py b/tests/test_strides.py
index abc80dd4071b..7a859a72285a 100644
--- a/tests/test_strides.py
+++ b/tests/test_strides.py
@@ -1,6 +1,6 @@
 import math
 import pytest
-from .helper import get_all_dtypes
+from .helper import get_all_dtypes, is_cpu_device
 
 import dpnp
 

From 9543c233e5e575d42b22131feb9f68cb9ba040a0 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Fri, 7 Apr 2023 00:24:35 +0200
Subject: [PATCH 083/129] Unset OCL_ICD_FILENAMES

---
 .github/workflows/generate_coverage.yaml | 2 --
 1 file changed, 2 deletions(-)

diff --git a/.github/workflows/generate_coverage.yaml b/.github/workflows/generate_coverage.yaml
index 5e2498758719..c60512fe3e30 100644
--- a/.github/workflows/generate_coverage.yaml
+++ b/.github/workflows/generate_coverage.yaml
@@ -51,8 +51,6 @@ jobs:
         run: |
           python scripts/gen_coverage.py --pytest-opts="--ignore tests/test_random.py \
                                                         --ignore tests/test_strides.py"
-        env:
-          OCL_ICD_FILENAMES: 'libintelocl.so'
       - name: Install coverall dependencies
         run: |
           sudo gem install coveralls-lcov

From d2c3fbcb4f101d7b5a91907b1062ecf528377d69 Mon Sep 17 00:00:00 2001
From: Oleksandr Pavlyk <oleksandr.pavlyk@intel.com>
Date: Fri, 7 Apr 2023 08:51:44 -0500
Subject: [PATCH 084/129] Update scripts/gen_coverage.py

Use `sys.executable` instead of `"python"` in `check_output` call
---
 scripts/gen_coverage.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/gen_coverage.py b/scripts/gen_coverage.py
index 65677f15b3a0..9fb2d86d0df0 100644
--- a/scripts/gen_coverage.py
+++ b/scripts/gen_coverage.py
@@ -24,7 +24,7 @@ def run(
         )
 
     setup_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-    dpctl_cmake_dir = subprocess.check_output(["python", "-m", "dpctl", "--cmakedir"])
+    dpctl_cmake_dir = subprocess.check_output([sys.executable, "-m", "dpctl", "--cmakedir"])
 
     cmake_args = [
         sys.executable,

From 2dfa8042eb7648a484a66e74bfa08acaf8586097 Mon Sep 17 00:00:00 2001
From: Vahid Tavanashad <vahid.tavanashad@intel.com>
Date: Wed, 5 Apr 2023 14:15:18 -0500
Subject: [PATCH 085/129] using rowvar flag in dpnp.cov

---
 dpnp/dpnp_iface_statistics.py | 17 +++++++++++------
 tests/test_statistics.py      | 17 ++++++++++++++++-
 2 files changed, 27 insertions(+), 7 deletions(-)

diff --git a/dpnp/dpnp_iface_statistics.py b/dpnp/dpnp_iface_statistics.py
index ab92f8cc6251..690937ec6bf4 100644
--- a/dpnp/dpnp_iface_statistics.py
+++ b/dpnp/dpnp_iface_statistics.py
@@ -44,6 +44,7 @@
 
 from dpnp.dpnp_algo import *
 from dpnp.dpnp_utils import *
+from dpnp.dpnp_array import dpnp_array
 import dpnp
 
 
@@ -237,7 +238,8 @@ def correlate(x1, x2, mode='valid'):
 
 
 def cov(x1, y=None, rowvar=True, bias=False, ddof=None, fweights=None, aweights=None):
-    """
+    """cov(m, y=None, rowvar=True, bias=False, ddof=None, fweights=None, aweights=None):
+
     Estimate a covariance matrix, given data and weights.
 
     For full documentation refer to :obj:`numpy.cov`.
@@ -248,7 +250,6 @@ def cov(x1, y=None, rowvar=True, bias=False, ddof=None, fweights=None, aweights=
     Dimension of input array ``m`` is limited by ``m.ndim > 2``.
     Size and shape of input arrays are supported to be equal.
     Prameters ``y`` is supported only with default value ``None``.
-    Prameters ``rowvar`` is supported only with default value ``True``.
     Prameters ``bias`` is supported only with default value ``False``.
     Prameters ``ddof`` is supported only with default value ``None``.
     Prameters ``fweights`` is supported only with default value ``None``.
@@ -280,8 +281,6 @@ def cov(x1, y=None, rowvar=True, bias=False, ddof=None, fweights=None, aweights=
             pass
         elif y is not None:
             pass
-        elif not rowvar:
-            pass
         elif bias:
             pass
         elif ddof is not None:
@@ -291,8 +290,14 @@ def cov(x1, y=None, rowvar=True, bias=False, ddof=None, fweights=None, aweights=
         elif aweights is not None:
             pass
         else:
-            if x1_desc.dtype != dpnp.float64:
-                x1_desc = dpnp.get_dpnp_descriptor(dpnp.astype(x1, dpnp.float64), copy_when_nondefault_queue=False)
+            if not rowvar and x1.shape[0] != 1:
+                x1 = x1.get_array() if isinstance(x1, dpnp_array) else x1
+                x1 = dpnp_array._create_from_usm_ndarray(x1.mT)
+                x1 = dpnp.astype(x1, dpnp.float64) if x1_desc.dtype != dpnp.float64 else x1
+                x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
+            elif x1_desc.dtype != dpnp.float64:
+                x1 = dpnp.astype(x1, dpnp.float64)
+                x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
 
             return dpnp_cov(x1_desc).get_pyobj()
 
diff --git a/tests/test_statistics.py b/tests/test_statistics.py
index 04a765a73bce..f4bea1b4abc4 100644
--- a/tests/test_statistics.py
+++ b/tests/test_statistics.py
@@ -1,5 +1,5 @@
 import pytest
-
+from .helper import get_all_dtypes
 import dpnp
 
 import numpy
@@ -114,3 +114,18 @@ def test_bincount_weights(self, array, weights):
         expected = numpy.bincount(np_a, weights=weights)
         result = dpnp.bincount(dpnp_a, weights=weights)
         numpy.testing.assert_array_equal(expected, result)
+
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_none=True, no_complex=True))
+def test_cov_rowvar1(dtype):
+    a = dpnp.array([[0, 2], [1, 1], [2, 0]], dtype=dtype)
+    b = numpy.array([[0, 2], [1, 1], [2, 0]], dtype=dtype)
+    numpy.testing.assert_array_equal(dpnp.cov(a.T), dpnp.cov(a,rowvar=False))
+    numpy.testing.assert_array_equal(numpy.cov(b,rowvar=False), dpnp.cov(a,rowvar=False))
+
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_none=True, no_complex=True))
+def test_cov_rowvar2(dtype):
+    a = dpnp.array([[0, 1, 2]], dtype=dtype)
+    b = numpy.array([[0, 1, 2]], dtype=dtype)
+    numpy.testing.assert_array_equal(numpy.cov(b,rowvar=False), dpnp.cov(a,rowvar=False))
+
+

From 1ec95ddeab798cae90ff61bf620b0916d9155dd5 Mon Sep 17 00:00:00 2001
From: Anton Volkov <antonwolfy@gmail.com>
Date: Wed, 15 Feb 2023 05:33:39 -0600
Subject: [PATCH 086/129] OneMKL 2023.1 includes fix gaussian() with
 box_muller2 method

---
 tests/test_random_state.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/tests/test_random_state.py b/tests/test_random_state.py
index 5ce1b759879b..4c78335771ba 100644
--- a/tests/test_random_state.py
+++ b/tests/test_random_state.py
@@ -473,10 +473,9 @@ def test_distr(self, usm_type):
         data = RandomState(seed, sycl_queue=sycl_queue).randn(3, 2, usm_type=usm_type)
         assert_array_almost_equal(data.asnumpy(), desired, decimal=precision)
 
-        # TODO: discuss with oneMKL: return 0.0 instead of the 1st element
         # call with omitted dimensions has to draw the first element from desired
-        # actual = dpnp.asnumpy(RandomState(seed).randn(usm_type=usm_type))
-        # assert_array_almost_equal(actual, desired[0, 0], decimal=precision)
+        actual = dpnp.asnumpy(RandomState(seed).randn(usm_type=usm_type))
+        assert_array_almost_equal(actual, desired[0, 0], decimal=precision)
 
         # randn() is an alias on standard_normal(), map arguments
         with mock.patch('dpnp.random.RandomState.standard_normal') as m:
@@ -622,10 +621,13 @@ def test_distr(self, usm_type):
         precision = numpy.finfo(dtype=numpy.float32).precision
         assert_array_almost_equal(data.asnumpy(), desired, decimal=precision)
 
-        # TODO: discuss with oneMKL: return 0.0 instead of the 1st element
+        # call with the same seed has to draw the same values
+        data = RandomState(seed, sycl_queue=sycl_queue).standard_normal(size=(4, 2), usm_type=usm_type)
+        assert_array_almost_equal(data.asnumpy(), desired, decimal=precision)
+
         # call with omitted dimensions has to draw the first element from desired
-        # actual = dpnp.asnumpy(RandomState(seed).standard_normal(usm_type=usm_type))
-        # assert_array_almost_equal(actual, desired[0, 0], decimal=precision)
+        actual = dpnp.asnumpy(RandomState(seed).standard_normal(usm_type=usm_type))
+        assert_array_almost_equal(actual, desired[0, 0], decimal=precision)
 
         # random_sample() is an alias on uniform(), map arguments
         with mock.patch('dpnp.random.RandomState.normal') as m:

From 9cb5848d138c4d5da35775a92e2e60abbe63d4c0 Mon Sep 17 00:00:00 2001
From: Anton Volkov <antonwolfy@gmail.com>
Date: Wed, 26 Apr 2023 13:16:58 +0200
Subject: [PATCH 087/129] Rename a variable with numpy array to 'expected' in
 tests

---
 tests/test_random_state.py | 120 ++++++++++++++++++-------------------
 1 file changed, 60 insertions(+), 60 deletions(-)

diff --git a/tests/test_random_state.py b/tests/test_random_state.py
index 4c78335771ba..e65e26b4a8ea 100644
--- a/tests/test_random_state.py
+++ b/tests/test_random_state.py
@@ -57,15 +57,15 @@ def test_distr(self, dtype, usm_type):
 
         # default dtype depends on fp64 support by the device
         dtype = get_default_floating() if dtype is None else dtype
-        desired = numpy.array([[0.428205496031286, -0.55383273779227 ],
-                               [2.027017795643378,  4.318888073163015],
-                               [2.69080893259102,  -1.047967253719708]], dtype=dtype)
+        expected = numpy.array([[0.428205496031286, -0.55383273779227 ],
+                                [2.027017795643378,  4.318888073163015],
+                                [2.69080893259102,  -1.047967253719708]], dtype=dtype)
 
         # TODO: discuss with opneMKL: there is a difference between CPU and GPU
         # generated samples since 9 digit while precision=15 for float64
         # precision = numpy.finfo(dtype=dtype).precision
         precision = 8 if dtype == dpnp.float64 else numpy.finfo(dtype=dtype).precision
-        assert_array_almost_equal(dpnp_data.asnumpy(), desired, decimal=precision)
+        assert_array_almost_equal(dpnp_data.asnumpy(), expected, decimal=precision)
 
         # check if compute follows data isn't broken
         assert_cfd(dpnp_data, sycl_queue, usm_type)
@@ -162,11 +162,11 @@ def test_fallback(self, loc, scale):
 
         # dpnp accepts only scalar as low and/or high, in other cases it will be a fallback to numpy
         actual = data.asnumpy()
-        desired = numpy.random.RandomState(seed).normal(loc=loc, scale=scale, size=size)
+        expected = numpy.random.RandomState(seed).normal(loc=loc, scale=scale, size=size)
 
         dtype = get_default_floating()
         precision = numpy.finfo(dtype=dtype).precision
-        assert_array_almost_equal(actual, desired, decimal=precision)
+        assert_array_almost_equal(actual, expected, decimal=precision)
 
         # check if compute follows data isn't broken
         assert_cfd(data, sycl_queue)
@@ -200,22 +200,22 @@ def test_distr(self, usm_type):
         dtype = get_default_floating()
 
         data = RandomState(seed, sycl_queue=sycl_queue).rand(3, 2, usm_type=usm_type)
-        desired = numpy.array([[0.7592552667483687, 0.5937560645397753],
-                               [0.257010098779574 , 0.749422621447593 ],
-                               [0.6316644293256104, 0.7411410815548152]], dtype=dtype)
+        expected = numpy.array([[0.7592552667483687, 0.5937560645397753],
+                                [0.257010098779574 , 0.749422621447593 ],
+                                [0.6316644293256104, 0.7411410815548152]], dtype=dtype)
 
         precision = numpy.finfo(dtype=numpy.float64).precision
-        assert_array_almost_equal(data.asnumpy(), desired, decimal=precision)
+        assert_array_almost_equal(data.asnumpy(), expected, decimal=precision)
         assert_cfd(data, sycl_queue, usm_type)
 
         # call with the same seed has to draw the same values
         data = RandomState(seed, sycl_queue=sycl_queue).rand(3, 2, usm_type=usm_type)
-        assert_array_almost_equal(data.asnumpy(), desired, decimal=precision)
+        assert_array_almost_equal(data.asnumpy(), expected, decimal=precision)
         assert_cfd(data, sycl_queue, usm_type)
 
-        # call with omitted dimensions has to draw the first element from desired
+        # call with omitted dimensions has to draw the first element from expected
         data = RandomState(seed, sycl_queue=sycl_queue).rand(usm_type=usm_type)
-        assert_array_almost_equal(data.asnumpy(), desired[0, 0], decimal=precision)
+        assert_array_almost_equal(data.asnumpy(), expected[0, 0], decimal=precision)
         assert_cfd(data, sycl_queue, usm_type)
 
         # rand() is an alias on random_sample(), map arguments
@@ -276,10 +276,10 @@ def test_distr(self, dtype, usm_type):
                                                                 size=(3, 2),
                                                                 dtype=dtype,
                                                                 usm_type=usm_type)
-        desired = numpy.array([[4, 1],
-                               [5, 3],
-                               [5, 7]], dtype=numpy.int32)
-        assert_array_equal(data.asnumpy(), desired)
+        expected = numpy.array([[4, 1],
+                                [5, 3],
+                                [5, 7]], dtype=numpy.int32)
+        assert_array_equal(data.asnumpy(), expected)
         assert_cfd(data, sycl_queue, usm_type)
 
         # call with the same seed has to draw the same values
@@ -288,15 +288,15 @@ def test_distr(self, dtype, usm_type):
                                                                 size=(3, 2),
                                                                 dtype=dtype,
                                                                 usm_type=usm_type)
-        assert_array_equal(data.asnumpy(), desired)
+        assert_array_equal(data.asnumpy(), expected)
         assert_cfd(data, sycl_queue, usm_type)
 
-        # call with omitted dimensions has to draw the first element from desired
+        # call with omitted dimensions has to draw the first element from expected
         data = RandomState(seed, sycl_queue=sycl_queue).randint(low=low,
                                                                 high=high,
                                                                 dtype=dtype,
                                                                 usm_type=usm_type)
-        assert_array_equal(data.asnumpy(), desired[0, 0])
+        assert_array_equal(data.asnumpy(), expected[0, 0])
         assert_cfd(data, sycl_queue, usm_type)
 
         # rand() is an alias on random_sample(), map arguments
@@ -311,15 +311,15 @@ def test_distr(self, dtype, usm_type):
 
     def test_float_bounds(self):
         actual = RandomState(365852).randint(low=0.6, high=6.789102534, size=(7,)).asnumpy()
-        desired = numpy.array([4, 4, 3, 3, 1, 0, 3], dtype=numpy.int32)
-        assert_array_equal(actual, desired)
+        expected = numpy.array([4, 4, 3, 3, 1, 0, 3], dtype=numpy.int32)
+        assert_array_equal(actual, expected)
 
 
     def test_negative_bounds(self):
         actual = RandomState(5143).randint(low=-15.74, high=-3, size=(2, 7)).asnumpy()
-        desired = numpy.array([[-9, -12, -4,  -12, -5, -13, -9],
-                               [-4, -6,  -13, -9,  -9,  -6, -15]], dtype=numpy.int32)
-        assert_array_equal(actual, desired)
+        expected = numpy.array([[-9, -12, -4,  -12, -5, -13, -9],
+                                [-4, -6,  -13, -9,  -9,  -6, -15]], dtype=numpy.int32)
+        assert_array_equal(actual, expected)
 
 
     def test_negative_interval(self):
@@ -417,8 +417,8 @@ def test_bounds_fallback(self, low, high):
 
         # dpnp accepts only scalar as low and/or high, in other cases it will be a fallback to numpy
         actual = RandomState(seed).randint(low=low, high=high, size=size).asnumpy()
-        desired = numpy.random.RandomState(seed).randint(low=low, high=high, size=size)
-        assert_equal(actual, desired)
+        expected = numpy.random.RandomState(seed).randint(low=low, high=high, size=size)
+        assert_equal(actual, expected)
 
 
     @pytest.mark.usefixtures("allow_fall_back_on_numpy")
@@ -436,8 +436,8 @@ def test_dtype_fallback(self, dtype):
 
         # dtype must be int or dpnp.int32, in other cases it will be a fallback to numpy
         actual = RandomState(seed).randint(low=low, high=high, size=size, dtype=dtype).asnumpy()
-        desired = numpy.random.RandomState(seed).randint(low=low, high=high, size=size, dtype=dtype)
-        assert_equal(actual, desired)
+        expected = numpy.random.RandomState(seed).randint(low=low, high=high, size=size, dtype=dtype)
+        assert_equal(actual, expected)
         assert_raises(TypeError, RandomState().randint, dtype=dtype)
 
 
@@ -459,7 +459,7 @@ def test_distr(self, usm_type):
         dtype = get_default_floating()
 
         data = RandomState(seed, sycl_queue=sycl_queue).randn(3, 2, usm_type=usm_type)
-        desired = numpy.array([[-0.862485623762009,  1.169492612490272],
+        expected = numpy.array([[-0.862485623762009,  1.169492612490272],
                                 [-0.405876118480338,  0.939006537666719],
                                 [-0.615075625641019,  0.555260469834381]], dtype=dtype)
 
@@ -467,15 +467,15 @@ def test_distr(self, usm_type):
         # generated samples since 9 digit while precision=15 for float64
         # precision = numpy.finfo(dtype=numpy.float64).precision
         precision = numpy.finfo(dtype=numpy.float32).precision
-        assert_array_almost_equal(data.asnumpy(), desired, decimal=precision)
+        assert_array_almost_equal(data.asnumpy(), expected, decimal=precision)
 
         # call with the same seed has to draw the same values
         data = RandomState(seed, sycl_queue=sycl_queue).randn(3, 2, usm_type=usm_type)
-        assert_array_almost_equal(data.asnumpy(), desired, decimal=precision)
+        assert_array_almost_equal(data.asnumpy(), expected, decimal=precision)
 
-        # call with omitted dimensions has to draw the first element from desired
+        # call with omitted dimensions has to draw the first element from expected
         actual = dpnp.asnumpy(RandomState(seed).randn(usm_type=usm_type))
-        assert_array_almost_equal(actual, desired[0, 0], decimal=precision)
+        assert_array_almost_equal(actual, expected[0, 0], decimal=precision)
 
         # randn() is an alias on standard_normal(), map arguments
         with mock.patch('dpnp.random.RandomState.standard_normal') as m:
@@ -610,24 +610,24 @@ def test_distr(self, usm_type):
         dtype = get_default_floating()
 
         data = RandomState(seed, sycl_queue=sycl_queue).standard_normal(size=(4, 2), usm_type=usm_type)
-        desired = numpy.array([[0.112455902594571, -0.249919829443642],
-                               [0.702423540827815,  1.548132130318456],
-                               [0.947364919775284, -0.432257289195464],
-                               [0.736848611436872,  1.557284323302839]], dtype=dtype)
+        expected = numpy.array([[0.112455902594571, -0.249919829443642],
+                                [0.702423540827815,  1.548132130318456],
+                                [0.947364919775284, -0.432257289195464],
+                                [0.736848611436872,  1.557284323302839]], dtype=dtype)
 
         # TODO: discuss with opneMKL: there is a difference between CPU and GPU
         # generated samples since 9 digit while precision=15 for float64
         # precision = numpy.finfo(dtype=numpy.float64).precision
         precision = numpy.finfo(dtype=numpy.float32).precision
-        assert_array_almost_equal(data.asnumpy(), desired, decimal=precision)
+        assert_array_almost_equal(data.asnumpy(), expected, decimal=precision)
 
         # call with the same seed has to draw the same values
         data = RandomState(seed, sycl_queue=sycl_queue).standard_normal(size=(4, 2), usm_type=usm_type)
-        assert_array_almost_equal(data.asnumpy(), desired, decimal=precision)
+        assert_array_almost_equal(data.asnumpy(), expected, decimal=precision)
 
-        # call with omitted dimensions has to draw the first element from desired
+        # call with omitted dimensions has to draw the first element from expected
         actual = dpnp.asnumpy(RandomState(seed).standard_normal(usm_type=usm_type))
-        assert_array_almost_equal(actual, desired[0, 0], decimal=precision)
+        assert_array_almost_equal(actual, expected[0, 0], decimal=precision)
 
         # random_sample() is an alias on uniform(), map arguments
         with mock.patch('dpnp.random.RandomState.normal') as m:
@@ -670,17 +670,17 @@ def test_distr(self, usm_type):
         dtype = get_default_floating()
 
         data = RandomState(seed, sycl_queue=sycl_queue).random_sample(size=(4, 2), usm_type=usm_type)
-        desired = numpy.array([[0.1887628440745175, 0.2763057765550911],
-                               [0.3973943444434553, 0.2975987731479108],
-                               [0.4144027342554182, 0.2636592474300414],
-                               [0.6129623607266694, 0.2596735346596688]], dtype=dtype)
+        expected = numpy.array([[0.1887628440745175, 0.2763057765550911],
+                                [0.3973943444434553, 0.2975987731479108],
+                                [0.4144027342554182, 0.2636592474300414],
+                                [0.6129623607266694, 0.2596735346596688]], dtype=dtype)
         
         precision = numpy.finfo(dtype=dtype).precision
-        assert_array_almost_equal(data.asnumpy(), desired, decimal=precision)
+        assert_array_almost_equal(data.asnumpy(), expected, decimal=precision)
 
-        # call with omitted dimensions has to draw the first element from desired
+        # call with omitted dimensions has to draw the first element from expected
         data = RandomState(seed, sycl_queue=sycl_queue).random_sample(usm_type=usm_type)
-        assert_array_almost_equal(data.asnumpy(), desired[0, 0], decimal=precision)
+        assert_array_almost_equal(data.asnumpy(), expected[0, 0], decimal=precision)
 
         # random_sample() is an alias on uniform(), map arguments
         with mock.patch('dpnp.random.RandomState.uniform') as m:
@@ -747,15 +747,15 @@ def test_distr(self, bounds, dtype, usm_type):
         # default dtype depends on fp64 support by the device
         dtype = get_default_floating() if dtype is None else dtype
         if dtype != dpnp.int32:
-            desired = numpy.array([[4.023770128630567, 8.87456423597643 ],
-                                   [2.888630247435067, 4.823004481580574],
-                                   [2.030351535445079, 4.533497077834326]])
-            assert_array_almost_equal(actual, desired, decimal=numpy.finfo(dtype=dtype).precision)
+            expected = numpy.array([[4.023770128630567, 8.87456423597643 ],
+                                    [2.888630247435067, 4.823004481580574],
+                                    [2.030351535445079, 4.533497077834326]])
+            assert_array_almost_equal(actual, expected, decimal=numpy.finfo(dtype=dtype).precision)
         else:
-            desired = numpy.array([[3, 8],
+            expected = numpy.array([[3, 8],
                                    [2, 4],
                                    [1, 4]])
-            assert_array_equal(actual, desired)
+            assert_array_equal(actual, expected)
 
         # check if compute follows data isn't broken
         assert_cfd(dpnp_data, sycl_queue, usm_type)
@@ -784,12 +784,12 @@ def test_low_high_equal(self, dtype, usm_type):
 
         # default dtype depends on fp64 support by the device
         dtype = get_default_floating() if dtype is None else dtype
-        desired = numpy.full(shape=shape, fill_value=low, dtype=dtype)
+        expected = numpy.full(shape=shape, fill_value=low, dtype=dtype)
 
         if dtype == dpnp.int32:
-            assert_array_equal(actual, desired)
+            assert_array_equal(actual, expected)
         else:
-            assert_array_almost_equal(actual, desired, decimal=numpy.finfo(dtype=dtype).precision)
+            assert_array_almost_equal(actual, expected, decimal=numpy.finfo(dtype=dtype).precision)
 
 
     @pytest.mark.usefixtures("allow_fall_back_on_numpy")
@@ -824,11 +824,11 @@ def test_fallback(self, low, high):
 
         # dpnp accepts only scalar as low and/or high, in other cases it will be a fallback to numpy
         actual = data.asnumpy()
-        desired = numpy.random.RandomState(seed).uniform(low=low, high=high, size=size)
+        expected = numpy.random.RandomState(seed).uniform(low=low, high=high, size=size)
 
         dtype = get_default_floating()
         precision = numpy.finfo(dtype=dtype).precision
-        assert_array_almost_equal(actual, desired, decimal=precision)
+        assert_array_almost_equal(actual, expected, decimal=precision)
 
         # check if compute follows data isn't broken
         assert_cfd(data, sycl_queue)

From 1a3864e099f5b753ce22ad0e75d17a139693f168 Mon Sep 17 00:00:00 2001
From: Vahid Tavanashad <vahid.tavanashad@intel.com>
Date: Wed, 19 Apr 2023 17:40:19 -0500
Subject: [PATCH 088/129] fix_cov_for_no_fp64

---
 dpnp/backend/kernels/dpnp_krnl_statistics.cpp |  4 +-
 dpnp/dpnp_algo/dpnp_algo_statistics.pxi       |  1 -
 dpnp/dpnp_iface_statistics.py                 | 55 +++++++++----------
 tests/test_statistics.py                      |  4 +-
 .../cupy/statistics_tests/test_correlation.py |  2 +-
 5 files changed, 32 insertions(+), 34 deletions(-)

diff --git a/dpnp/backend/kernels/dpnp_krnl_statistics.cpp b/dpnp/backend/kernels/dpnp_krnl_statistics.cpp
index abf77ff25eec..eaaf6b72f89f 100644
--- a/dpnp/backend/kernels/dpnp_krnl_statistics.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_statistics.cpp
@@ -192,7 +192,7 @@ DPCTLSyclEventRef dpnp_cov_c(DPCTLSyclQueueRef q_ref,
                                 nrows,                            // std::int64_t n,
                                 ncols,                            // std::int64_t k,
                                 alpha,                            // T alpha,
-                                temp,                             //const T* a,
+                                temp,                             // const T* a,
                                 ncols,                            // std::int64_t lda,
                                 beta,                             // T beta,
                                 result,                           // T* c,
@@ -1384,7 +1384,7 @@ void func_map_init_statistics(func_map_t& fmap)
 
     fmap[DPNPFuncName::DPNP_FN_COV_EXT][eft_INT][eft_INT] = {eft_DBL, (void*)dpnp_cov_ext_c<double>};
     fmap[DPNPFuncName::DPNP_FN_COV_EXT][eft_LNG][eft_LNG] = {eft_DBL, (void*)dpnp_cov_ext_c<double>};
-    fmap[DPNPFuncName::DPNP_FN_COV_EXT][eft_FLT][eft_FLT] = {eft_DBL, (void*)dpnp_cov_ext_c<double>};
+    fmap[DPNPFuncName::DPNP_FN_COV_EXT][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_cov_ext_c<float>};
     fmap[DPNPFuncName::DPNP_FN_COV_EXT][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_cov_ext_c<double>};
 
     fmap[DPNPFuncName::DPNP_FN_MAX][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_max_default_c<int32_t>};
diff --git a/dpnp/dpnp_algo/dpnp_algo_statistics.pxi b/dpnp/dpnp_algo/dpnp_algo_statistics.pxi
index 6131d292bf93..d2868a8ee042 100644
--- a/dpnp/dpnp_algo/dpnp_algo_statistics.pxi
+++ b/dpnp/dpnp_algo/dpnp_algo_statistics.pxi
@@ -179,7 +179,6 @@ cpdef utils.dpnp_descriptor dpnp_correlate(utils.dpnp_descriptor x1, utils.dpnp_
     return result
 
 
-# supports "double" input only
 cpdef utils.dpnp_descriptor dpnp_cov(utils.dpnp_descriptor array1):
     cdef shape_type_c input_shape = array1.shape
 
diff --git a/dpnp/dpnp_iface_statistics.py b/dpnp/dpnp_iface_statistics.py
index 690937ec6bf4..966a72142695 100644
--- a/dpnp/dpnp_iface_statistics.py
+++ b/dpnp/dpnp_iface_statistics.py
@@ -41,7 +41,7 @@
 
 
 import numpy
-
+import dpctl.tensor as dpt
 from dpnp.dpnp_algo import *
 from dpnp.dpnp_utils import *
 from dpnp.dpnp_array import dpnp_array
@@ -247,7 +247,7 @@ def cov(x1, y=None, rowvar=True, bias=False, ddof=None, fweights=None, aweights=
     Limitations
     -----------
     Input array ``m`` is supported as :obj:`dpnp.ndarray`.
-    Dimension of input array ``m`` is limited by ``m.ndim > 2``.
+    Dimension of input array ``m`` is limited by ``m.ndim <= 2``.
     Size and shape of input arrays are supported to be equal.
     Prameters ``y`` is supported only with default value ``None``.
     Prameters ``bias`` is supported only with default value ``False``.
@@ -257,7 +257,7 @@ def cov(x1, y=None, rowvar=True, bias=False, ddof=None, fweights=None, aweights=
     Otherwise the function will be executed sequentially on CPU.
     Input array data types are limited by supported DPNP :ref:`Data types`.
 
-    .. seealso:: :obj:`dpnp.corrcoef` normalized covariance matrix.
+    .. see also:: :obj:`dpnp.corrcoef` normalized covariance matrix.
 
     Examples
     --------
@@ -274,31 +274,30 @@ def cov(x1, y=None, rowvar=True, bias=False, ddof=None, fweights=None, aweights=
     [1.0, -1.0, -1.0, 1.0]
 
     """
-
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
-    if x1_desc:
-        if x1_desc.ndim > 2:
-            pass
-        elif y is not None:
-            pass
-        elif bias:
-            pass
-        elif ddof is not None:
-            pass
-        elif fweights is not None:
-            pass
-        elif aweights is not None:
-            pass
-        else:
-            if not rowvar and x1.shape[0] != 1:
-                x1 = x1.get_array() if isinstance(x1, dpnp_array) else x1
-                x1 = dpnp_array._create_from_usm_ndarray(x1.mT)
-                x1 = dpnp.astype(x1, dpnp.float64) if x1_desc.dtype != dpnp.float64 else x1
-                x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
-            elif x1_desc.dtype != dpnp.float64:
-                x1 = dpnp.astype(x1, dpnp.float64)
-                x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
-
+    if not isinstance(x1, (dpnp_array, dpt.usm_ndarray)):
+        pass
+    elif x1.ndim > 2:
+        pass
+    elif y is not None:
+        pass
+    elif bias:
+        pass
+    elif ddof is not None:
+        pass
+    elif fweights is not None:
+        pass
+    elif aweights is not None:
+        pass
+    else:
+        if not rowvar and x1.shape[0] != 1:
+            x1 = x1.get_array() if isinstance(x1, dpnp_array) else x1
+            x1 = dpnp_array._create_from_usm_ndarray(x1.mT)
+
+        if not x1.dtype in (dpnp.float32, dpnp.float64):
+            x1 = dpnp.astype(x1, dpnp.default_float_type(sycl_queue=x1.sycl_queue))
+
+        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
+        if x1_desc:
             return dpnp_cov(x1_desc).get_pyobj()
 
     return call_origin(numpy.cov, x1, y, rowvar, bias, ddof, fweights, aweights)
diff --git a/tests/test_statistics.py b/tests/test_statistics.py
index f4bea1b4abc4..d9879244f098 100644
--- a/tests/test_statistics.py
+++ b/tests/test_statistics.py
@@ -116,14 +116,14 @@ def test_bincount_weights(self, array, weights):
         numpy.testing.assert_array_equal(expected, result)
 
 @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_none=True, no_complex=True))
-def test_cov_rowvar1(dtype):
+def test_cov_rowvar(dtype):
     a = dpnp.array([[0, 2], [1, 1], [2, 0]], dtype=dtype)
     b = numpy.array([[0, 2], [1, 1], [2, 0]], dtype=dtype)
     numpy.testing.assert_array_equal(dpnp.cov(a.T), dpnp.cov(a,rowvar=False))
     numpy.testing.assert_array_equal(numpy.cov(b,rowvar=False), dpnp.cov(a,rowvar=False))
 
 @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_none=True, no_complex=True))
-def test_cov_rowvar2(dtype):
+def test_cov_1D_rowvar(dtype):
     a = dpnp.array([[0, 1, 2]], dtype=dtype)
     b = numpy.array([[0, 1, 2]], dtype=dtype)
     numpy.testing.assert_array_equal(numpy.cov(b,rowvar=False), dpnp.cov(a,rowvar=False))
diff --git a/tests/third_party/cupy/statistics_tests/test_correlation.py b/tests/third_party/cupy/statistics_tests/test_correlation.py
index 3c68a998b5ad..b726951373af 100644
--- a/tests/third_party/cupy/statistics_tests/test_correlation.py
+++ b/tests/third_party/cupy/statistics_tests/test_correlation.py
@@ -48,7 +48,7 @@ def generate_input(self, a_shape, y_shape, xp, dtype):
         return a, y
 
     @testing.for_all_dtypes()
-    @testing.numpy_cupy_allclose()
+    @testing.numpy_cupy_allclose(type_check=False)
     def check(self, a_shape, y_shape=None, rowvar=True, bias=False,
               ddof=None, xp=None, dtype=None):
         a, y = self.generate_input(a_shape, y_shape, xp, dtype)

From 13575a3437f6598e41874e056a4ec2b231065e27 Mon Sep 17 00:00:00 2001
From: Anton Volkov <antonwolfy@gmail.com>
Date: Thu, 20 Apr 2023 09:40:42 -0500
Subject: [PATCH 089/129] Use internal dpnp kernel instead of MKL if no FP64
 support

---
 dpnp/backend/kernels/dpnp_krnl_elemwise.cpp | 52 ++++++++++++---------
 dpnp/backend/src/dpnp_fptr.hpp              |  2 +-
 2 files changed, 32 insertions(+), 22 deletions(-)

diff --git a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp
index 0f691a03ab60..e4c6cf5c16b2 100644
--- a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp
@@ -138,7 +138,7 @@
         else                                                                                                           \
         {                                                                                                              \
             auto kernel_parallel_for_func = [=](sycl::id<1> global_id) {                                               \
-                size_t output_id = global_id[0]; /*for (size_t i = 0; i < result_size; ++i)*/                          \
+                size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */                        \
                 {                                                                                                      \
                     const _DataType_output input_elem = input1_data[output_id];                                        \
                     result[output_id] = __operation1__;                                                                \
@@ -149,16 +149,17 @@
                     gws, kernel_parallel_for_func);                                                                    \
             };                                                                                                         \
                                                                                                                        \
-            if constexpr ((std::is_same<_DataType_input, double>::value ||                                             \
-                           std::is_same<_DataType_input, float>::value) &&                                             \
-                          std::is_same<_DataType_input, _DataType_output>::value)                                      \
-            {                                                                                                          \
-                event = __operation2__;                                                                                \
-            }                                                                                                          \
-            else                                                                                                       \
+            if constexpr (both_types_are_same<_DataType_input, _DataType_output, float, double>)                       \
             {                                                                                                          \
-                event = q.submit(kernel_func);                                                                         \
+                if (q.get_device().has(sycl::aspect::fp64))                                                            \
+                {                                                                                                      \
+                    event = __operation2__;                                                                            \
+                                                                                                                       \
+                    event_ref = reinterpret_cast<DPCTLSyclEventRef>(&event);                                           \
+                    return DPCTLEvent_Copy(event_ref);                                                                 \
+                }                                                                                                      \
             }                                                                                                          \
+            event = q.submit(kernel_func);                                                                             \
         }                                                                                                              \
                                                                                                                        \
         event_ref = reinterpret_cast<DPCTLSyclEventRef>(&event);                                                       \
@@ -671,14 +672,17 @@ static void func_map_init_elemwise_1arg_2type(func_map_t& fmap)
                 cgh.parallel_for<class __name__##_kernel<_DataType>>(gws, kernel_parallel_for_func);                   \
             };                                                                                                         \
                                                                                                                        \
-            if constexpr (std::is_same<_DataType, double>::value || std::is_same<_DataType, float>::value)             \
-            {                                                                                                          \
-                event = __operation2__;                                                                                \
-            }                                                                                                          \
-            else                                                                                                       \
+            if constexpr (is_any_v<_DataType, float, double>)                                                          \
             {                                                                                                          \
-                event = q.submit(kernel_func);                                                                         \
+                if (q.get_device().has(sycl::aspect::fp64))                                                            \
+                {                                                                                                      \
+                    event = __operation2__;                                                                            \
+                                                                                                                       \
+                    event_ref = reinterpret_cast<DPCTLSyclEventRef>(&event);                                           \
+                    return DPCTLEvent_Copy(event_ref);                                                                 \
+                }                                                                                                      \
             }                                                                                                          \
+            event = q.submit(kernel_func);                                                                             \
         }                                                                                                              \
                                                                                                                        \
         event_ref = reinterpret_cast<DPCTLSyclEventRef>(&event);                                                       \
@@ -848,7 +852,6 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap)
     return;
 }
 
-
 #define MACRO_2ARG_3TYPES_OP(                                                                                          \
     __name__, __operation__, __vec_operation__, __vec_types__, __mkl_operation__, __mkl_types__)                       \
     template <typename _KernelNameSpecialization1,                                                                     \
@@ -1030,12 +1033,19 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap)
         {                                                                                                              \
             if constexpr (both_types_are_same<_DataType_input1, _DataType_input2, __mkl_types__>)                      \
             {                                                                                                          \
-                event = __mkl_operation__(q, result_size, input1_data, input2_data, result);                           \
+                if (q.get_device().has(sycl::aspect::fp64))                                                            \
+                {                                                                                                      \
+                    event = __mkl_operation__(q, result_size, input1_data, input2_data, result);                       \
+                                                                                                                       \
+                    event_ref = reinterpret_cast<DPCTLSyclEventRef>(&event);                                           \
+                    return DPCTLEvent_Copy(event_ref);                                                                 \
+                }                                                                                                      \
             }                                                                                                          \
-            else if constexpr (none_of_both_types<_DataType_input1,                                                    \
-                                                  _DataType_input2,                                                    \
-                                                  std::complex<float>,                                                 \
-                                                  std::complex<double>>)                                               \
+                                                                                                                       \
+            if constexpr (none_of_both_types<_DataType_input1,                                                         \
+                                             _DataType_input2,                                                         \
+                                             std::complex<float>,                                                      \
+                                             std::complex<double>>)                                                    \
             {                                                                                                          \
                 constexpr size_t lws = 64;                                                                             \
                 constexpr unsigned int vec_sz = 8;                                                                     \
diff --git a/dpnp/backend/src/dpnp_fptr.hpp b/dpnp/backend/src/dpnp_fptr.hpp
index d6c48784e6bd..9f8c102bca5e 100644
--- a/dpnp/backend/src/dpnp_fptr.hpp
+++ b/dpnp/backend/src/dpnp_fptr.hpp
@@ -164,7 +164,7 @@ template <typename T, typename... Ts>
 struct are_same : std::conjunction<std::is_same<T, Ts>...> {};
 
 /**
- * A template constant to check if type T matces any type from Ts.
+ * A template constant to check if type T matches any type from Ts.
  */
 template <typename T, typename... Ts>
 constexpr auto is_any_v = is_any<T, Ts...>::value;

From 69eae318fc2ea563b39bc65d4a2df4a9b614e592 Mon Sep 17 00:00:00 2001
From: Anton Volkov <antonwolfy@gmail.com>
Date: Tue, 25 Apr 2023 17:22:49 +0200
Subject: [PATCH 090/129] Get rid of default casting to double while invoking
 sycl::efr() and sycl::sign()

---
 .../include/dpnp_gen_1arg_1type_tbl.hpp       |  6 ++--
 dpnp/backend/kernels/dpnp_krnl_elemwise.cpp   | 34 ++++++++++++++++++-
 2 files changed, 36 insertions(+), 4 deletions(-)

diff --git a/dpnp/backend/include/dpnp_gen_1arg_1type_tbl.hpp b/dpnp/backend/include/dpnp_gen_1arg_1type_tbl.hpp
index 0f6cb5b31deb..beb4fb427de9 100644
--- a/dpnp/backend/include/dpnp_gen_1arg_1type_tbl.hpp
+++ b/dpnp/backend/include/dpnp_gen_1arg_1type_tbl.hpp
@@ -91,14 +91,14 @@
 MACRO_1ARG_1TYPE_OP(dpnp_conjugate_c, std::conj(input_elem), q.submit(kernel_func))
 MACRO_1ARG_1TYPE_OP(dpnp_copy_c, input_elem, q.submit(kernel_func))
 MACRO_1ARG_1TYPE_OP(dpnp_erf_c,
-                    sycl::erf((double)input_elem),
-                    oneapi::mkl::vm::erf(q, input1_size, input1_data, result)) // no sycl::erf for int and long
+                    dispatch_erf_op(input_elem),
+                    oneapi::mkl::vm::erf(q, input1_size, input1_data, result))
 MACRO_1ARG_1TYPE_OP(dpnp_negative_c, -input_elem, q.submit(kernel_func))
 MACRO_1ARG_1TYPE_OP(dpnp_recip_c,
                     _DataType(1) / input_elem,
                     q.submit(kernel_func)) // error: no member named 'recip' in namespace 'sycl'
 MACRO_1ARG_1TYPE_OP(dpnp_sign_c,
-                    sycl::sign((double)input_elem),
+                    dispatch_sign_op(input_elem),
                     q.submit(kernel_func)) // no sycl::sign for int and long
 MACRO_1ARG_1TYPE_OP(dpnp_square_c,
                     input_elem* input_elem,
diff --git a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp
index e4c6cf5c16b2..b5418867fd05 100644
--- a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp
@@ -559,6 +559,37 @@ static void func_map_init_elemwise_1arg_2type(func_map_t& fmap)
     return;
 }
 
+template <typename T>
+constexpr auto dispatch_erf_op(T elem)
+{
+    if constexpr (is_any_v<T, std::int32_t, std::int64_t>)
+    {
+        // TODO: need to convert to double when possible?
+        return sycl::erf((float)elem);
+    }
+    else
+    {
+        return sycl::erf(elem);
+    }
+}
+
+template <typename T>
+constexpr auto dispatch_sign_op(T elem)
+{
+    if constexpr (is_any_v<T, std::int32_t, std::int64_t>)
+    {
+        if (elem > 0)
+            return T(1);
+        if (elem < 0)
+            return T(-1);
+        return elem; // elem is 0
+    }
+    else
+    {
+        return sycl::sign(elem);
+    }
+}
+
 #define MACRO_1ARG_1TYPE_OP(__name__, __operation1__, __operation2__)                                                  \
     template <typename _KernelNameSpecialization>                                                                      \
     class __name__##_kernel;                                                                                           \
@@ -594,6 +625,7 @@ static void func_map_init_elemwise_1arg_2type(func_map_t& fmap)
         }                                                                                                              \
                                                                                                                        \
         sycl::queue q = *(reinterpret_cast<sycl::queue*>(q_ref));                                                      \
+        const bool has_fp64_aspect = q.get_device().has(sycl::aspect::fp64);                                           \
                                                                                                                        \
         _DataType* input1_data = static_cast<_DataType*>(const_cast<void*>(input1_in));                                \
         _DataType* result = static_cast<_DataType*>(result_out);                                                       \
@@ -674,7 +706,7 @@ static void func_map_init_elemwise_1arg_2type(func_map_t& fmap)
                                                                                                                        \
             if constexpr (is_any_v<_DataType, float, double>)                                                          \
             {                                                                                                          \
-                if (q.get_device().has(sycl::aspect::fp64))                                                            \
+                if (has_fp64_aspect)                                                                                   \
                 {                                                                                                      \
                     event = __operation2__;                                                                            \
                                                                                                                        \

From 84b9f99e30e2a1b6e6cea71b04b2ce0318b60916 Mon Sep 17 00:00:00 2001
From: Anton Volkov <antonwolfy@gmail.com>
Date: Wed, 26 Apr 2023 21:06:13 +0200
Subject: [PATCH 091/129] Update test for dpnp.erf() with strides

---
 dpnp/backend/kernels/dpnp_krnl_elemwise.cpp |  9 ++++-----
 tests/test_strides.py                       | 11 ++++-------
 2 files changed, 8 insertions(+), 12 deletions(-)

diff --git a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp
index b5418867fd05..50440cdebc81 100644
--- a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp
@@ -560,11 +560,11 @@ static void func_map_init_elemwise_1arg_2type(func_map_t& fmap)
 }
 
 template <typename T>
-constexpr auto dispatch_erf_op(T elem)
+constexpr T dispatch_erf_op(T elem)
 {
     if constexpr (is_any_v<T, std::int32_t, std::int64_t>)
     {
-        // TODO: need to convert to double when possible?
+        // TODO: need to convert to double when possible
         return sycl::erf((float)elem);
     }
     else
@@ -574,7 +574,7 @@ constexpr auto dispatch_erf_op(T elem)
 }
 
 template <typename T>
-constexpr auto dispatch_sign_op(T elem)
+constexpr T dispatch_sign_op(T elem)
 {
     if constexpr (is_any_v<T, std::int32_t, std::int64_t>)
     {
@@ -625,7 +625,6 @@ constexpr auto dispatch_sign_op(T elem)
         }                                                                                                              \
                                                                                                                        \
         sycl::queue q = *(reinterpret_cast<sycl::queue*>(q_ref));                                                      \
-        const bool has_fp64_aspect = q.get_device().has(sycl::aspect::fp64);                                           \
                                                                                                                        \
         _DataType* input1_data = static_cast<_DataType*>(const_cast<void*>(input1_in));                                \
         _DataType* result = static_cast<_DataType*>(result_out);                                                       \
@@ -706,7 +705,7 @@ constexpr auto dispatch_sign_op(T elem)
                                                                                                                        \
             if constexpr (is_any_v<_DataType, float, double>)                                                          \
             {                                                                                                          \
-                if (has_fp64_aspect)                                                                                   \
+                if (q.get_device().has(sycl::aspect::fp64))                                                            \
                 {                                                                                                      \
                     event = __operation2__;                                                                            \
                                                                                                                        \
diff --git a/tests/test_strides.py b/tests/test_strides.py
index 7a859a72285a..84449db23d61 100644
--- a/tests/test_strides.py
+++ b/tests/test_strides.py
@@ -68,19 +68,16 @@ def test_strides_1arg(func_name, dtype, shape):
                          [(10,)],
                          ids=["(10,)"])
 def test_strides_erf(dtype, shape):
-    a = numpy.arange(numpy.prod(shape), dtype=dtype).reshape(shape)
+    a = dpnp.reshape(dpnp.linspace(-1, 1, num=numpy.prod(shape), dtype=dtype), shape)
     b = a[::2]
 
-    dpa = dpnp.reshape(dpnp.arange(numpy.prod(shape), dtype=dtype), shape)
-    dpb = dpa[::2]
-
-    result = dpnp.erf(dpb)
+    result = dpnp.erf(b)
 
-    expected = numpy.empty_like(b)
+    expected = numpy.empty_like(b.asnumpy())
     for idx, val in enumerate(b):
         expected[idx] = math.erf(val)
 
-    assert_allclose(result, expected)
+    assert_allclose(result, expected, rtol=1e-06)
 
 
 @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))

From 153f1ca3e76e30b87956676ebbaecf3dec4cd9f7 Mon Sep 17 00:00:00 2001
From: Anton Volkov <antonwolfy@gmail.com>
Date: Wed, 19 Apr 2023 07:44:06 -0500
Subject: [PATCH 092/129] Add dpnp.linalg.eigh() function

---
 .gitignore                                    |   1 +
 dpnp/CMakeLists.txt                           |   1 +
 dpnp/backend/CMakeLists.txt                   |   2 +-
 dpnp/backend/extensions/lapack/CMakeLists.txt |  75 ++++++
 dpnp/backend/extensions/lapack/heevd.cpp      | 210 +++++++++++++++++
 dpnp/backend/extensions/lapack/heevd.hpp      |  51 +++++
 dpnp/backend/extensions/lapack/lapack_py.cpp  |  57 +++++
 dpnp/backend/extensions/lapack/syevd.cpp      | 214 ++++++++++++++++++
 dpnp/backend/extensions/lapack/syevd.hpp      |  51 +++++
 dpnp/dpnp_iface.py                            |  45 +++-
 dpnp/linalg/dpnp_iface_linalg.py              |  67 ++++++
 dpnp/linalg/dpnp_utils_linalg.py              | 134 +++++++++++
 tests/skipped_tests_gpu.tbl                   |  12 +
 tests/test_linalg.py                          |  47 +++-
 tests/test_sycl_queue.py                      |  32 +++
 .../cupy/creation_tests/test_ranges.py        |   2 +-
 .../cupy/linalg_tests/test_eigenvalue.py      | 199 ++++++++++++++++
 tests/third_party/cupy/testing/helper.py      |  28 ++-
 18 files changed, 1216 insertions(+), 12 deletions(-)
 create mode 100644 dpnp/backend/extensions/lapack/CMakeLists.txt
 create mode 100644 dpnp/backend/extensions/lapack/heevd.cpp
 create mode 100644 dpnp/backend/extensions/lapack/heevd.hpp
 create mode 100644 dpnp/backend/extensions/lapack/lapack_py.cpp
 create mode 100644 dpnp/backend/extensions/lapack/syevd.cpp
 create mode 100644 dpnp/backend/extensions/lapack/syevd.hpp
 create mode 100644 dpnp/linalg/dpnp_utils_linalg.py
 create mode 100644 tests/third_party/cupy/linalg_tests/test_eigenvalue.py

diff --git a/.gitignore b/.gitignore
index 8beb38f1efd6..ea9f2cba333d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,6 @@
 # CMake build and local install directory
 _skbuild
+build
 build_cython
 dpnp.egg-info
 
diff --git a/dpnp/CMakeLists.txt b/dpnp/CMakeLists.txt
index 8309b0422e4e..54be4eb23b9a 100644
--- a/dpnp/CMakeLists.txt
+++ b/dpnp/CMakeLists.txt
@@ -47,6 +47,7 @@ endfunction()
 
 build_dpnp_cython_ext_with_backend(dparray ${CMAKE_CURRENT_SOURCE_DIR}/dparray.pyx dpnp)
 add_subdirectory(backend)
+add_subdirectory(backend/extensions/lapack)
 
 add_subdirectory(dpnp_algo)
 add_subdirectory(dpnp_utils)
diff --git a/dpnp/backend/CMakeLists.txt b/dpnp/backend/CMakeLists.txt
index 894d3b6e72d4..6fd13acd1128 100644
--- a/dpnp/backend/CMakeLists.txt
+++ b/dpnp/backend/CMakeLists.txt
@@ -109,7 +109,7 @@ add_library(dpnp_backend_library INTERFACE IMPORTED GLOBAL)
 target_include_directories(dpnp_backend_library BEFORE INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/include ${CMAKE_CURRENT_SOURCE_DIR}/src)
 target_link_libraries(dpnp_backend_library INTERFACE ${_trgt})
 
-if(DPNP_BACKEND_TESTS)
+if (DPNP_BACKEND_TESTS)
   add_subdirectory(tests)
 endif()
 
diff --git a/dpnp/backend/extensions/lapack/CMakeLists.txt b/dpnp/backend/extensions/lapack/CMakeLists.txt
new file mode 100644
index 000000000000..a32adaa431ff
--- /dev/null
+++ b/dpnp/backend/extensions/lapack/CMakeLists.txt
@@ -0,0 +1,75 @@
+# *****************************************************************************
+# Copyright (c) 2016-2023, Intel Corporation
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# - Redistributions of source code must retain the above copyright notice,
+#   this list of conditions and the following disclaimer.
+# - Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+# THE POSSIBILITY OF SUCH DAMAGE.
+# *****************************************************************************
+
+
+set(python_module_name _lapack_impl)
+pybind11_add_module(${python_module_name} MODULE
+    lapack_py.cpp
+    heevd.cpp
+    syevd.cpp
+)
+
+if (WIN32)
+    if (${CMAKE_VERSION} VERSION_LESS "3.23")
+        # this is a work-around for target_link_options inserting option after -link option, cause
+        # linker to ignore it.
+        set(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} -fsycl-device-code-split=per_kernel")
+    endif()
+endif()
+
+set_target_properties(${python_module_name} PROPERTIES CMAKE_POSITION_INDEPENDENT_CODE ON)
+
+target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../include)
+target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../src)
+
+target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIRS})
+
+if (WIN32)
+  target_compile_options(${python_module_name} PRIVATE
+    /clang:-fno-approx-func
+    /clang:-fno-finite-math-only
+    )
+else()
+  target_compile_options(${python_module_name} PRIVATE
+    -fno-approx-func
+    -fno-finite-math-only
+    )
+endif()
+
+target_link_options(${python_module_name} PUBLIC -fsycl-device-code-split=per_kernel)
+if (UNIX)
+    # this option is support on Linux only
+    target_link_options(${python_module_name} PUBLIC -fsycl-link-huge-device-code)
+endif()
+
+if (DPNP_GENERATE_COVERAGE)
+    target_link_options(${python_module_name} PRIVATE -fprofile-instr-generate -fcoverage-mapping)
+endif()
+
+target_link_libraries(${python_module_name} PUBLIC MKL::MKL_DPCPP)
+
+install(TARGETS ${python_module_name}
+  DESTINATION "dpnp/backend/extensions/lapack"
+)
diff --git a/dpnp/backend/extensions/lapack/heevd.cpp b/dpnp/backend/extensions/lapack/heevd.cpp
new file mode 100644
index 000000000000..f873ee14d754
--- /dev/null
+++ b/dpnp/backend/extensions/lapack/heevd.cpp
@@ -0,0 +1,210 @@
+//*****************************************************************************
+// Copyright (c) 2023, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+
+#include <pybind11/pybind11.h>
+
+#include "heevd.hpp"
+
+#include "dpnp_utils.hpp"
+
+
+namespace dpnp
+{
+namespace backend
+{
+namespace ext
+{
+namespace lapack
+{
+
+namespace mkl_lapack = oneapi::mkl::lapack;
+namespace py = pybind11;
+
+template <typename T, typename RealT>
+static inline sycl::event call_heevd(sycl::queue exec_q,
+                                     const oneapi::mkl::job jobz,
+                                     const oneapi::mkl::uplo upper_lower,
+                                     const std::int64_t n,
+                                     T* a,
+                                     RealT* w,
+                                     std::vector<sycl::event> &host_task_events,
+                                     const std::vector<sycl::event>& depends)
+{
+    validate_type_for_device<T>(exec_q);
+    validate_type_for_device<RealT>(exec_q);
+
+    const std::int64_t lda = std::max<size_t>(1UL, n);
+    const std::int64_t scratchpad_size = mkl_lapack::heevd_scratchpad_size<T>(exec_q, jobz, upper_lower, n, lda);
+    T* scratchpad = nullptr;
+
+    std::stringstream error_msg;
+    std::int64_t info = 0;
+
+    sycl::event heevd_event;
+    try
+    {
+        scratchpad = sycl::malloc_device<T>(scratchpad_size, exec_q);
+
+        heevd_event = mkl_lapack::heevd(
+            exec_q,
+            jobz,        // 'jobz == job::vec' means eigenvalues and eigenvectors are computed.
+            upper_lower, // 'upper_lower == job::upper' means the upper triangular part of A, or the lower triangular otherwise
+            n,           // The order of the matrix A (0 <= n)
+            a,           // Pointer to A, size (lda, *), where the 2nd dimension, must be at least max(1, n)
+                         // If 'jobz == job::vec', then on exit it will contain the eigenvectors of A
+            lda,         // The leading dimension of a, must be at least max(1, n)
+            w,           // Pointer to array of size at least n, it will contain the eigenvalues of A in ascending order
+            scratchpad,  // Pointer to scratchpad memory to be used by MKL routine for storing intermediate results
+            scratchpad_size,
+            depends);
+    }
+    catch (mkl_lapack::exception const& e)
+    {
+        error_msg << "Unexpected MKL exception caught during heevd() call:\nreason: " << e.what()
+                  << "\ninfo: " << e.info();
+        info = e.info();
+    }
+    catch (sycl::exception const& e)
+    {
+        error_msg << "Unexpected SYCL exception caught during heevd() call:\n" << e.what();
+        info = -1;
+    }
+
+    if (info != 0) // an unexected error occurs
+    {
+        if (scratchpad != nullptr)
+        {
+            sycl::free(scratchpad, exec_q);
+        }
+        throw std::runtime_error(error_msg.str());
+    }
+
+    sycl::event clean_up_event = exec_q.submit([&](sycl::handler& cgh) {
+        cgh.depends_on(heevd_event);
+        auto ctx = exec_q.get_context();
+        cgh.host_task([ctx, scratchpad]() { sycl::free(scratchpad, ctx); });
+    });
+    host_task_events.push_back(clean_up_event);
+    return heevd_event;
+}
+
+std::pair<sycl::event, sycl::event> heevd(sycl::queue exec_q,
+                                          const std::int8_t jobz,
+                                          const std::int8_t upper_lower,
+                                          dpctl::tensor::usm_ndarray eig_vecs,
+                                          dpctl::tensor::usm_ndarray eig_vals,
+                                          const std::vector<sycl::event>& depends)
+{
+    const int eig_vecs_nd = eig_vecs.get_ndim();
+    const int eig_vals_nd = eig_vals.get_ndim();
+
+    if (eig_vecs_nd != 2)
+    {
+        throw py::value_error("Unexpected ndim=" + std::to_string(eig_vecs_nd) +
+                              " of an output array with eigenvectors");
+    }
+    else if (eig_vals_nd != 1)
+    {
+        throw py::value_error("Unexpected ndim=" + std::to_string(eig_vals_nd) +
+                              " of an output array with eigenvalues");
+    }
+
+    const py::ssize_t* eig_vecs_shape = eig_vecs.get_shape_raw();
+    const py::ssize_t* eig_vals_shape = eig_vals.get_shape_raw();
+
+    if (eig_vecs_shape[0] != eig_vecs_shape[1])
+    {
+        throw py::value_error("Output array with eigenvectors with be square");
+    }
+    else if (eig_vecs_shape[0] != eig_vals_shape[0])
+    {
+        throw py::value_error("Eigenvectors and eigenvalues have different shapes");
+    }
+
+    size_t src_nelems(1);
+
+    for (int i = 0; i < eig_vecs_nd; ++i)
+    {
+        src_nelems *= static_cast<size_t>(eig_vecs_shape[i]);
+    }
+
+    if (src_nelems == 0)
+    {
+        // nothing to do
+        return std::make_pair(sycl::event(), sycl::event());
+    }
+
+    // check compatibility of execution queue and allocation queue
+    if (!dpctl::utils::queues_are_compatible(exec_q, {eig_vecs, eig_vals}))
+    {
+        throw py::value_error("Execution queue is not compatible with allocation queues");
+    }
+
+    // check that arrays do not overlap, and concurrent access is safe.
+    // TODO: need to be exposed by DPCTL headers
+    // auto const &overlap = dpctl::tensor::overlap::MemoryOverlap();
+    // if (overlap(eig_vecs, eig_vals))
+    // {
+    //     throw py::value_error("Arrays index overlapping segments of memory");
+    // }
+
+    int eig_vecs_typenum = eig_vecs.get_typenum();
+    int eig_vals_typenum = eig_vals.get_typenum();
+    auto const& dpctl_capi = dpctl::detail::dpctl_capi::get();
+
+    sycl::event heevd_ev;
+    std::vector<sycl::event> host_task_events;
+
+    const std::int64_t n = eig_vecs_shape[0];
+    const oneapi::mkl::job jobz_val = static_cast<oneapi::mkl::job>(jobz);
+    const oneapi::mkl::uplo uplo_val = static_cast<oneapi::mkl::uplo>(upper_lower);
+
+    if ((eig_vecs_typenum == dpctl_capi.UAR_CDOUBLE_) && (eig_vals_typenum == dpctl_capi.UAR_DOUBLE_))
+    {
+        std::complex<double>* a = reinterpret_cast<std::complex<double>*>(eig_vecs.get_data());
+        double* w = reinterpret_cast<double*>(eig_vals.get_data());
+
+        heevd_ev = call_heevd(exec_q, jobz_val, uplo_val, n, a, w, host_task_events, depends);
+    }
+    else if ((eig_vecs_typenum == dpctl_capi.UAR_CFLOAT_) && (eig_vals_typenum == dpctl_capi.UAR_FLOAT_))
+    {
+        std::complex<float>* a = reinterpret_cast<std::complex<float>*>(eig_vecs.get_data());
+        float* w = reinterpret_cast<float*>(eig_vals.get_data());
+
+        heevd_ev = call_heevd(exec_q, jobz_val, uplo_val, n, a, w, host_task_events, depends);
+    }
+    else
+    {
+        throw py::value_error("Unexpected types of either eigenvectors or eigenvalues");
+    }
+
+    sycl::event args_ev = dpctl::utils::keep_args_alive(exec_q, {eig_vecs, eig_vals}, host_task_events);
+    return std::make_pair(args_ev, heevd_ev);
+}
+}
+}
+}
+}
diff --git a/dpnp/backend/extensions/lapack/heevd.hpp b/dpnp/backend/extensions/lapack/heevd.hpp
new file mode 100644
index 000000000000..93ce6fe560e1
--- /dev/null
+++ b/dpnp/backend/extensions/lapack/heevd.hpp
@@ -0,0 +1,51 @@
+//*****************************************************************************
+// Copyright (c) 2023, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#pragma once
+
+#include <CL/sycl.hpp>
+#include <oneapi/mkl.hpp>
+
+#include <dpctl4pybind11.hpp>
+
+
+namespace dpnp
+{
+namespace backend
+{
+namespace ext
+{
+namespace lapack
+{
+    extern std::pair<sycl::event, sycl::event> heevd(sycl::queue exec_q,
+                                                     const std::int8_t jobz,
+                                                     const std::int8_t upper_lower,
+                                                     dpctl::tensor::usm_ndarray eig_vecs,
+                                                     dpctl::tensor::usm_ndarray eig_vals,
+                                                     const std::vector<sycl::event>& depends);
+}
+}
+}
+}
diff --git a/dpnp/backend/extensions/lapack/lapack_py.cpp b/dpnp/backend/extensions/lapack/lapack_py.cpp
new file mode 100644
index 000000000000..ea7506308032
--- /dev/null
+++ b/dpnp/backend/extensions/lapack/lapack_py.cpp
@@ -0,0 +1,57 @@
+//*****************************************************************************
+// Copyright (c) 2023, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+//
+// This file defines functions of dpnp.backend._lapack_impl extensions
+//
+//*****************************************************************************
+
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+
+#include "heevd.hpp"
+#include "syevd.hpp"
+
+namespace py = pybind11;
+
+PYBIND11_MODULE(_lapack_impl, m)
+{
+    m.def("_heevd",
+          &dpnp::backend::ext::lapack::heevd,
+          "Call `heevd` from OneMKL LAPACK library to return "
+          "the eigenvalues and eigenvectors of a complex Hermitian matrix",
+          py::arg("sycl_queue"),
+          py::arg("jobz"), py::arg("upper_lower"),
+          py::arg("eig_vecs"), py::arg("eig_vals"),
+          py::arg("depends") = py::list());
+
+    m.def("_syevd",
+          &dpnp::backend::ext::lapack::syevd,
+          "Call `syevd` from OneMKL LAPACK library to return "
+          "the eigenvalues and eigenvectors of a real symmetric matrix",
+          py::arg("sycl_queue"),
+          py::arg("jobz"), py::arg("upper_lower"),
+          py::arg("eig_vecs"), py::arg("eig_vals"),
+          py::arg("depends") = py::list());
+}
diff --git a/dpnp/backend/extensions/lapack/syevd.cpp b/dpnp/backend/extensions/lapack/syevd.cpp
new file mode 100644
index 000000000000..93be82d201d8
--- /dev/null
+++ b/dpnp/backend/extensions/lapack/syevd.cpp
@@ -0,0 +1,214 @@
+//*****************************************************************************
+// Copyright (c) 2023, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+
+#include <pybind11/pybind11.h>
+
+#include "syevd.hpp"
+
+#include "dpnp_utils.hpp"
+
+
+namespace dpnp
+{
+namespace backend
+{
+namespace ext
+{
+namespace lapack
+{
+
+namespace mkl_lapack = oneapi::mkl::lapack;
+namespace py = pybind11;
+
+template <typename T>
+static inline sycl::event call_syevd(sycl::queue exec_q,
+                                     const oneapi::mkl::job jobz,
+                                     const oneapi::mkl::uplo upper_lower,
+                                     const std::int64_t n,
+                                     T* a,
+                                     T* w,
+                                     std::vector<sycl::event> &host_task_events,
+                                     const std::vector<sycl::event>& depends)
+{
+    validate_type_for_device<T>(exec_q);
+
+    const std::int64_t lda = std::max<size_t>(1UL, n);
+    const std::int64_t scratchpad_size = mkl_lapack::syevd_scratchpad_size<T>(exec_q, jobz, upper_lower, n, lda);
+    T* scratchpad = nullptr;
+
+    std::stringstream error_msg;
+    std::int64_t info = 0;
+
+    sycl::event syevd_event;
+    try
+    {
+        scratchpad = sycl::malloc_device<T>(scratchpad_size, exec_q);
+
+        syevd_event = mkl_lapack::syevd(
+            exec_q,
+            jobz,        // 'jobz == job::vec' means eigenvalues and eigenvectors are computed.
+            upper_lower, // 'upper_lower == job::upper' means the upper triangular part of A, or the lower triangular otherwise
+            n,           // The order of the matrix A (0 <= n)
+            a,           // Pointer to A, size (lda, *), where the 2nd dimension, must be at least max(1, n)
+                         // If 'jobz == job::vec', then on exit it will contain the eigenvectors of A
+            lda,         // The leading dimension of a, must be at least max(1, n)
+            w,           // Pointer to array of size at least n, it will contain the eigenvalues of A in ascending order
+            scratchpad,  // Pointer to scratchpad memory to be used by MKL routine for storing intermediate results
+            scratchpad_size,
+            depends);
+    }
+    catch (mkl_lapack::exception const& e)
+    {
+        error_msg << "Unexpected MKL exception caught during syevd() call:\nreason: " << e.what()
+                  << "\ninfo: " << e.info();
+        info = e.info();
+    }
+    catch (sycl::exception const& e)
+    {
+        error_msg << "Unexpected SYCL exception caught during syevd() call:\n" << e.what();
+        info = -1;
+    }
+
+    if (info != 0) // an unexected error occurs
+    {
+        if (scratchpad != nullptr)
+        {
+            sycl::free(scratchpad, exec_q);
+        }
+        throw std::runtime_error(error_msg.str());
+    }
+
+    sycl::event clean_up_event = exec_q.submit([&](sycl::handler& cgh) {
+        cgh.depends_on(syevd_event);
+        auto ctx = exec_q.get_context();
+        cgh.host_task([ctx, scratchpad]() { sycl::free(scratchpad, ctx); });
+    });
+    host_task_events.push_back(clean_up_event);
+    return syevd_event;
+}
+
+std::pair<sycl::event, sycl::event> syevd(sycl::queue exec_q,
+                                          const std::int8_t jobz,
+                                          const std::int8_t upper_lower,
+                                          dpctl::tensor::usm_ndarray eig_vecs,
+                                          dpctl::tensor::usm_ndarray eig_vals,
+                                          const std::vector<sycl::event>& depends)
+{
+    const int eig_vecs_nd = eig_vecs.get_ndim();
+    const int eig_vals_nd = eig_vals.get_ndim();
+
+    if (eig_vecs_nd != 2)
+    {
+        throw py::value_error("Unexpected ndim=" + std::to_string(eig_vecs_nd) +
+                              " of an output array with eigenvectors");
+    }
+    else if (eig_vals_nd != 1)
+    {
+        throw py::value_error("Unexpected ndim=" + std::to_string(eig_vals_nd) +
+                              " of an output array with eigenvalues");
+    }
+
+    const py::ssize_t* eig_vecs_shape = eig_vecs.get_shape_raw();
+    const py::ssize_t* eig_vals_shape = eig_vals.get_shape_raw();
+
+    if (eig_vecs_shape[0] != eig_vecs_shape[1])
+    {
+        throw py::value_error("Output array with eigenvectors with be square");
+    }
+    else if (eig_vecs_shape[0] != eig_vals_shape[0])
+    {
+        throw py::value_error("Eigenvectors and eigenvalues have different shapes");
+    }
+
+    size_t src_nelems(1);
+
+    for (int i = 0; i < eig_vecs_nd; ++i)
+    {
+        src_nelems *= static_cast<size_t>(eig_vecs_shape[i]);
+    }
+
+    if (src_nelems == 0)
+    {
+        // nothing to do
+        return std::make_pair(sycl::event(), sycl::event());
+    }
+
+    // check compatibility of execution queue and allocation queue
+    if (!dpctl::utils::queues_are_compatible(exec_q, {eig_vecs, eig_vals}))
+    {
+        throw py::value_error("Execution queue is not compatible with allocation queues");
+    }
+
+    // check that arrays do not overlap, and concurrent access is safe.
+    // TODO: need to be exposed by DPCTL headers
+    // auto const& overlap = dpctl::tensor::overlap::MemoryOverlap();
+    // if (overlap(eig_vecs, eig_vals))
+    // {
+    //     throw py::value_error("Arrays index overlapping segments of memory");
+    // }
+
+    int eig_vecs_typenum = eig_vecs.get_typenum();
+    int eig_vals_typenum = eig_vals.get_typenum();
+    auto const& dpctl_capi = dpctl::detail::dpctl_capi::get();
+
+    sycl::event syevd_ev;
+    std::vector<sycl::event> host_task_events;
+
+    const std::int64_t n = eig_vecs_shape[0];
+    const oneapi::mkl::job jobz_val = static_cast<oneapi::mkl::job>(jobz);
+    const oneapi::mkl::uplo uplo_val = static_cast<oneapi::mkl::uplo>(upper_lower);
+
+    if (eig_vecs_typenum != eig_vals_typenum)
+    {
+        throw py::value_error("Types of eigenvectors and eigenvalues aare missmatched");
+    }
+    else if (eig_vecs_typenum == dpctl_capi.UAR_DOUBLE_)
+    {
+        double* a = reinterpret_cast<double*>(eig_vecs.get_data());
+        double* w = reinterpret_cast<double*>(eig_vals.get_data());
+
+        syevd_ev = call_syevd(exec_q, jobz_val, uplo_val, n, a, w, host_task_events, depends);
+    }
+    else if (eig_vecs_typenum == dpctl_capi.UAR_FLOAT_)
+    {
+        float* a = reinterpret_cast<float*>(eig_vecs.get_data());
+        float* w = reinterpret_cast<float*>(eig_vals.get_data());
+
+        syevd_ev = call_syevd(exec_q, jobz_val, uplo_val, n, a, w, host_task_events, depends);
+    }
+    else
+    {
+        throw py::value_error("Unexpected types with num=" + std::to_string(eig_vecs_typenum) +
+                              " for eigenvectors and eigenvalues");
+    }
+
+    sycl::event args_ev = dpctl::utils::keep_args_alive(exec_q, {eig_vecs, eig_vals}, host_task_events);
+    return std::make_pair(args_ev, syevd_ev);
+}
+}
+}
+}
+}
diff --git a/dpnp/backend/extensions/lapack/syevd.hpp b/dpnp/backend/extensions/lapack/syevd.hpp
new file mode 100644
index 000000000000..14d167ec02a7
--- /dev/null
+++ b/dpnp/backend/extensions/lapack/syevd.hpp
@@ -0,0 +1,51 @@
+//*****************************************************************************
+// Copyright (c) 2023, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#pragma once
+
+#include <CL/sycl.hpp>
+#include <oneapi/mkl.hpp>
+
+#include <dpctl4pybind11.hpp>
+
+
+namespace dpnp
+{
+namespace backend
+{
+namespace ext
+{
+namespace lapack
+{
+    extern std::pair<sycl::event, sycl::event> syevd(sycl::queue exec_q,
+                                                     const std::int8_t jobz,
+                                                     const std::int8_t upper_lower,
+                                                     dpctl::tensor::usm_ndarray eig_vecs,
+                                                     dpctl::tensor::usm_ndarray eig_vals,
+                                                     const std::vector<sycl::event>& depends = {});
+}
+}
+}
+}
diff --git a/dpnp/dpnp_iface.py b/dpnp/dpnp_iface.py
index 6a5bcf239df2..f6aabbb1399d 100644
--- a/dpnp/dpnp_iface.py
+++ b/dpnp/dpnp_iface.py
@@ -67,7 +67,9 @@
     "from_dlpack",
     "get_dpnp_descriptor",
     "get_include",
-    "get_normalized_queue_device"
+    "get_normalized_queue_device",
+    "get_usm_ndarray",
+    "is_supported_array_type"
 ]
 
 from dpnp import (
@@ -371,3 +373,44 @@ def get_normalized_queue_device(obj=None,
     if hasattr(dpt._device, 'normalize_queue_device'):
         return dpt._device.normalize_queue_device(sycl_queue=sycl_queue, device=device)
     return sycl_queue
+
+
+def get_usm_ndarray(a):
+    """
+    Return :class:`dpctl.tensor.usm_ndarray` from input array `a`.
+
+    Parameters
+    ----------
+    a : {dpnp_array, usm_ndarray}
+        Input array of supported type :class:`dpnp.ndarray`
+        or :class:`dpctl.tensor.usm_ndarray`.
+
+    Returns
+    -------
+    out : usm_ndarray
+        A dpctl USM ndarray of input array `a`.
+
+    """
+
+    return a.get_array() if isinstance(a, dpnp_array) else a
+
+
+def is_supported_array_type(a):
+    """
+    Return ``True`` if an array of either type :class:`dpnp.ndarray`
+    or :class:`dpctl.tensor.usm_ndarray` type, ``False`` otherwise.
+
+    Parameters
+    ----------
+    a : array
+        An input array to check the type.
+
+    Returns
+    -------
+    out : bool
+        ``True`` if type of array `a` is supported array type,
+        ``False`` otherwise.
+
+    """
+
+    return isinstance(a, (dpnp_array, dpt.usm_ndarray))
diff --git a/dpnp/linalg/dpnp_iface_linalg.py b/dpnp/linalg/dpnp_iface_linalg.py
index 6e6f55db8f92..e2e962585786 100644
--- a/dpnp/linalg/dpnp_iface_linalg.py
+++ b/dpnp/linalg/dpnp_iface_linalg.py
@@ -41,6 +41,10 @@
 
 
 import dpnp
+from .dpnp_utils_linalg import (
+    dpnp_eigh
+)
+
 import numpy
 
 from dpnp.dpnp_utils import *
@@ -53,6 +57,7 @@
     "cond",
     "det",
     "eig",
+    "eigh",
     "eigvals",
     "inv",
     "matrix_power",
@@ -172,6 +177,68 @@ def eig(x1):
     return call_origin(numpy.linalg.eig, x1)
 
 
+def eigh(a, UPLO='L'):
+    """
+    Return the eigenvalues and eigenvectors of a complex Hermitian
+    (conjugate symmetric) or a real symmetric matrix.
+
+    Returns two objects, a 1-D array containing the eigenvalues of `a`, and
+    a 2-D square array or matrix (depending on the input type) of the
+    corresponding eigenvectors (in columns).
+
+    For full documentation refer to :obj:`numpy.linalg.eigh`.
+
+    Returns
+    -------
+    w : (..., M) dpnp.ndarray
+        The eigenvalues in ascending order, each repeated according to
+        its multiplicity.
+    v : (..., M, M) dpnp.ndarray
+        The column ``v[:, i]`` is the normalized eigenvector corresponding
+        to the eigenvalue ``w[i]``.
+
+    Limitations
+    -----------
+    Parameter `a` is supported as :class:`dpnp.ndarray` or :class:`dpctl.tensor.usm_ndarray`.
+    Input array data types are limited by supported DPNP :ref:`Data types`.
+
+    See Also
+    --------
+    :obj:`dpnp.eig` : eigenvalues and right eigenvectors for non-symmetric arrays.
+    :obj:`dpnp.eigvals` : eigenvalues of non-symmetric arrays.
+
+    Examples
+    --------
+    >>> import dpnp as dp
+    >>> a = dp.array([[1, -2j], [2j, 5]])
+    >>> a
+    array([[ 1.+0.j, -0.-2.j],
+           [ 0.+2.j,  5.+0.j]])
+    >>> w, v = dp.linalg.eigh(a)
+    >>> w; v
+    array([0.17157288, 5.82842712]),
+    array([[-0.92387953-0.j        , -0.38268343+0.j        ], # may vary
+           [ 0.        +0.38268343j,  0.        -0.92387953j]]))
+    
+    """
+
+    if UPLO not in ('L', 'U'):
+        raise ValueError("UPLO argument must be 'L' or 'U'")
+
+    if not dpnp.is_supported_array_type(a):
+        raise TypeError("An array must be any of supported type, but got {}".format(type(a)))
+
+    if a.ndim < 2:
+        raise ValueError("%d-dimensional array given. Array must be "
+                         "at least two-dimensional" % a.ndim)
+
+    m, n = a.shape[-2:]
+    if m != n:
+        raise ValueError("Last 2 dimensions of the array must be square")
+
+    return dpnp_eigh(a, UPLO=UPLO)
+
+
 def eigvals(input):
     """
     Compute the eigenvalues of a general matrix.
diff --git a/dpnp/linalg/dpnp_utils_linalg.py b/dpnp/linalg/dpnp_utils_linalg.py
new file mode 100644
index 000000000000..32a9ac7d5607
--- /dev/null
+++ b/dpnp/linalg/dpnp_utils_linalg.py
@@ -0,0 +1,134 @@
+# cython: language_level=3
+# distutils: language = c++
+# -*- coding: utf-8 -*-
+# *****************************************************************************
+# Copyright (c) 2023, Intel Corporation
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# - Redistributions of source code must retain the above copyright notice,
+#   this list of conditions and the following disclaimer.
+# - Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+# THE POSSIBILITY OF SUCH DAMAGE.
+# *****************************************************************************
+
+
+import dpnp
+import dpnp.backend.extensions.lapack._lapack_impl as li
+
+import dpctl.tensor._tensor_impl as ti
+
+__all__ = [
+    "dpnp_eigh"
+]
+
+_jobz = {'N': 0, 'V': 1}
+_upper_lower = {'U': 0, 'L': 1}
+
+
+def dpnp_eigh(a, UPLO):
+    """
+    Return the eigenvalues and eigenvectors of a complex Hermitian
+    (conjugate symmetric) or a real symmetric matrix.
+
+    The main calculation is done by calling an extention function
+    for LAPACK library of OneMKL. Depending on input type of `a` array,
+    it will be either ``heevd`` (for complex types) or ``syevd`` (for others).
+
+    """
+
+    a_usm_type = a.usm_type
+    a_sycl_queue = a.sycl_queue
+    a_order = 'C' if a.flags.c_contiguous else 'F'
+    a_usm_arr = dpnp.get_usm_ndarray(a)
+
+    # 'V' means both eigenvectors and eigenvalues will be calculated
+    jobz = _jobz['V']
+    uplo = _upper_lower[UPLO]
+
+    # get resulting type of arrays with eigenvalues and eigenvectors
+    a_dtype = a.dtype
+    lapack_func = "_syevd"
+    if dpnp.issubdtype(a_dtype, dpnp.complexfloating):
+        lapack_func = "_heevd"
+        v_type = a_dtype
+        w_type = dpnp.float64 if a_dtype == dpnp.complex128 else dpnp.float32
+    elif dpnp.issubdtype(a_dtype, dpnp.floating):
+        v_type = w_type = a_dtype
+    elif a_sycl_queue.sycl_device.has_aspect_fp64:
+        v_type = w_type = dpnp.float64
+    else:
+        v_type = w_type = dpnp.float32
+
+    if a.ndim > 2:
+        w = dpnp.empty(a.shape[:-1], dtype=w_type, usm_type=a_usm_type, sycl_queue=a_sycl_queue)
+
+        # need to loop over the 1st dimension to get eigenvalues and eigenvectors of 3d matrix A
+        op_count = a.shape[0]
+        if op_count == 0:
+            return w, dpnp.empty_like(a, dtype=v_type)
+
+        eig_vecs = [None] * op_count
+        ht_copy_ev = [None] * op_count
+        ht_lapack_ev = [None] * op_count
+        for i in range(op_count):
+            # oneMKL LAPACK assumes fortran-like array as input, so
+            # allocate a memory with 'F' order for dpnp array of eigenvectors
+            eig_vecs[i] = dpnp.empty_like(a[i], order='F', dtype=v_type)
+
+            # use DPCTL tensor function to fill the array of eigenvectors with content of input array
+            ht_copy_ev[i], copy_ev = ti._copy_usm_ndarray_into_usm_ndarray(src=a_usm_arr[i], dst=eig_vecs[i].get_array(), sycl_queue=a_sycl_queue)
+
+            # call LAPACK extension function to get eigenvalues and eigenvectors of a portion of matrix A
+            ht_lapack_ev[i], _ = getattr(li, lapack_func)(a_sycl_queue, jobz, uplo, eig_vecs[i].get_array(), w[i].get_array(), depends=[copy_ev])
+
+            # TODO: remove once dpctl fix is available
+            ht_lapack_ev[i].wait()
+
+        for i in range(op_count):
+            # ht_lapack_ev[i].wait()
+            ht_copy_ev[i].wait()
+
+        # combine the list of eigenvectors into a single array
+        v = dpnp.array(eig_vecs, order=a_order)
+        return w, v
+    else:
+        # oneMKL LAPACK assumes fortran-like array as input, so
+        # allocate a memory with 'F' order for dpnp array of eigenvectors
+        v = dpnp.empty_like(a, order='F', dtype=v_type)
+
+        # use DPCTL tensor function to fill the array of eigenvectors with content of input array
+        ht_copy_ev, copy_ev = ti._copy_usm_ndarray_into_usm_ndarray(src=a_usm_arr, dst=v.get_array(), sycl_queue=a_sycl_queue)
+
+        # allocate a memory for dpnp array of eigenvalues
+        w = dpnp.empty(a.shape[:-1], dtype=w_type, usm_type=a_usm_type, sycl_queue=a_sycl_queue)
+
+        # call LAPACK extension function to get eigenvalues and eigenvectors of matrix A
+        ht_lapack_ev, lapack_ev = getattr(li, lapack_func)(a_sycl_queue, jobz, uplo, v.get_array(), w.get_array(), depends=[copy_ev])
+
+        if a_order != 'F':
+            # need to align order of eigenvectors with one of input matrix A
+            out_v = dpnp.empty_like(v, order=a_order)
+            ht_copy_out_ev, _ = ti._copy_usm_ndarray_into_usm_ndarray(src=v.get_array(), dst=out_v.get_array(), sycl_queue=a_sycl_queue, depends=[lapack_ev])
+            ht_copy_out_ev.wait()
+        else:
+            out_v = v
+
+        ht_lapack_ev.wait()
+        ht_copy_ev.wait()
+
+        return w, out_v
diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl
index fee79df860fb..36b17d5edbc3 100644
--- a/tests/skipped_tests_gpu.tbl
+++ b/tests/skipped_tests_gpu.tbl
@@ -405,6 +405,8 @@ tests/third_party/cupy/core_tests/test_ndarray_reduction.py::TestArrayReduction:
 tests/third_party/cupy/core_tests/test_ndarray_reduction.py::TestArrayReduction::test_ptp_multiple_axes
 tests/third_party/cupy/core_tests/test_ndarray_reduction.py::TestArrayReduction::test_ptp_multiple_axes_keepdims
 tests/third_party/cupy/core_tests/test_ndarray_reduction.py::TestArrayReduction::test_ptp_nan
+tests/third_party/cupy/core_tests/test_ndarray_reduction.py::TestArrayReduction::test_ptp_nan_imag
+tests/third_party/cupy/core_tests/test_ndarray_reduction.py::TestArrayReduction::test_ptp_nan_real
 
 tests/third_party/cupy/core_tests/test_ndarray_reduction.py::TestCubReduction_param_0_{order='C', shape=(10,)}::test_cub_max
 tests/third_party/cupy/core_tests/test_ndarray_reduction.py::TestCubReduction_param_0_{order='C', shape=(10,)}::test_cub_min
@@ -645,11 +647,14 @@ tests/third_party/cupy/indexing_tests/test_indexing.py::TestIndexing::test_take_
 tests/third_party/cupy/indexing_tests/test_indexing.py::TestSelect::test_select
 tests/third_party/cupy/indexing_tests/test_indexing.py::TestSelect::test_select_1D_choicelist
 tests/third_party/cupy/indexing_tests/test_indexing.py::TestSelect::test_select_choicelist_condlist_broadcast
+tests/third_party/cupy/indexing_tests/test_indexing.py::TestSelect::test_select_complex
 tests/third_party/cupy/indexing_tests/test_indexing.py::TestSelect::test_select_default
+tests/third_party/cupy/indexing_tests/test_indexing.py::TestSelect::test_select_default_complex
 tests/third_party/cupy/indexing_tests/test_indexing.py::TestSelect::test_select_default_scalar
 tests/third_party/cupy/indexing_tests/test_indexing.py::TestSelect::test_select_empty_lists
 tests/third_party/cupy/indexing_tests/test_indexing.py::TestSelect::test_select_length_error
 tests/third_party/cupy/indexing_tests/test_indexing.py::TestSelect::test_select_odd_shaped_broadcastable
+tests/third_party/cupy/indexing_tests/test_indexing.py::TestSelect::test_select_odd_shaped_broadcastable_complex
 tests/third_party/cupy/indexing_tests/test_indexing.py::TestSelect::test_select_odd_shaped_non_broadcastable
 tests/third_party/cupy/indexing_tests/test_indexing.py::TestSelect::test_select_type_error_condlist
 tests/third_party/cupy/indexing_tests/test_indexing.py::TestIndexing::test_diagonal
@@ -992,9 +997,14 @@ tests/third_party/cupy/math_tests/test_rounding.py::TestRoundExtreme_param_5_{de
 tests/third_party/cupy/math_tests/test_rounding.py::TestRoundExtreme_param_5_{decimals=99}::test_round_small
 tests/third_party/cupy/math_tests/test_rounding.py::TestRoundExtreme_param_6_{decimals=100}::test_round_large
 tests/third_party/cupy/math_tests/test_rounding.py::TestRoundExtreme_param_6_{decimals=100}::test_round_small
+tests/third_party/cupy/math_tests/test_rounding.py::TestRounding::test_around
+tests/third_party/cupy/math_tests/test_rounding.py::TestRounding::test_ceil
 tests/third_party/cupy/math_tests/test_rounding.py::TestRounding::test_fix
+tests/third_party/cupy/math_tests/test_rounding.py::TestRounding::test_floor
 tests/third_party/cupy/math_tests/test_rounding.py::TestRounding::test_rint
 tests/third_party/cupy/math_tests/test_rounding.py::TestRounding::test_rint_negative
+tests/third_party/cupy/math_tests/test_rounding.py::TestRounding::test_round_
+tests/third_party/cupy/math_tests/test_rounding.py::TestRounding::test_trunc
 tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_sum_all_transposed
 tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_sum_all_transposed2
 tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_sum_axes
@@ -1430,6 +1440,8 @@ tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_b
 tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram
 tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_array_bins
 tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_bins_not_ordered
+tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_complex_weights
+tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_complex_weights_uneven_bins 
 tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_density
 tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_empty
 tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_float_weights
diff --git a/tests/test_linalg.py b/tests/test_linalg.py
index d9784a41558f..d90ac8bf9c6c 100644
--- a/tests/test_linalg.py
+++ b/tests/test_linalg.py
@@ -1,5 +1,8 @@
 import pytest
-from .helper import get_all_dtypes
+from .helper import (
+    get_all_dtypes,
+    get_complex_dtypes
+)
 
 import dpnp as inp
 
@@ -33,12 +36,12 @@ def vvsort(val, vec, size, xp):
         unravel_imax = numpy.unravel_index(imax, val.shape)
 
         # swap elements in val array
-        temp = xp.array(val[unravel_i], dtype=vec.dtype, **val_kwargs)
+        temp = xp.array(val[unravel_i], dtype=val.dtype, **val_kwargs)
         val[unravel_i] = val[unravel_imax]
         val[unravel_imax] = temp
 
         # swap corresponding columns in vec matrix
-        temp = xp.array(vec[:, i], dtype=val.dtype, **vec_kwargs)
+        temp = xp.array(vec[:, i], dtype=vec.dtype, **vec_kwargs)
         vec[:, i] = vec[:, imax]
         vec[:, imax] = temp
 
@@ -126,11 +129,49 @@ def test_eig_arange(type, size):
     assert (dpnp_vec.dtype == np_vec.dtype)
     assert (dpnp_val.shape == np_val.shape)
     assert (dpnp_vec.shape == np_vec.shape)
+    assert (dpnp_val.usm_type == dpnp_symm.usm_type)
+    assert (dpnp_vec.usm_type == dpnp_symm.usm_type)
 
     assert_allclose(dpnp_val, np_val, rtol=1e-05, atol=1e-05)
     assert_allclose(dpnp_vec, np_vec, rtol=1e-05, atol=1e-05)
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
+@pytest.mark.parametrize("type", get_all_dtypes(no_bool=True, no_none=True))
+@pytest.mark.parametrize("size", [2, 4, 8])
+def test_eigh_arange(type, size):
+    a = numpy.arange(size * size, dtype=type).reshape((size, size))
+    symm_orig = numpy.tril(a) + numpy.tril(a, -1).T + numpy.diag(numpy.full((size,), size * size, dtype=type))
+    symm = symm_orig
+    dpnp_symm_orig = inp.array(symm)
+    dpnp_symm = dpnp_symm_orig
+
+    dpnp_val, dpnp_vec = inp.linalg.eigh(dpnp_symm)
+    np_val, np_vec = numpy.linalg.eigh(symm)
+
+    # DPNP sort val/vec by abs value
+    vvsort(dpnp_val, dpnp_vec, size, inp)
+
+    # NP sort val/vec by abs value
+    vvsort(np_val, np_vec, size, numpy)
+
+    # NP change sign of vectors
+    for i in range(np_vec.shape[1]):
+        if (np_vec[0, i] * dpnp_vec[0, i]).asnumpy() < 0:
+            np_vec[:, i] = -np_vec[:, i]
+
+    assert_array_equal(symm_orig, symm)
+    assert_array_equal(dpnp_symm_orig, dpnp_symm)
+
+    assert (dpnp_val.shape == np_val.shape)
+    assert (dpnp_vec.shape == np_vec.shape)
+    assert (dpnp_val.usm_type == dpnp_symm.usm_type)
+    assert (dpnp_vec.usm_type == dpnp_symm.usm_type)
+
+    assert_allclose(dpnp_val, np_val, rtol=1e-05, atol=1e-04)
+    assert_allclose(dpnp_vec, np_vec, rtol=1e-05, atol=1e-04)
+
+
 @pytest.mark.parametrize("type", get_all_dtypes(no_bool=True, no_complex=True))
 def test_eigvals(type):
     if dpctl.get_current_device_type() != dpctl.device_type.gpu:
diff --git a/tests/test_sycl_queue.py b/tests/test_sycl_queue.py
index fcea0d82eb86..e5e53646e1a1 100644
--- a/tests/test_sycl_queue.py
+++ b/tests/test_sycl_queue.py
@@ -769,6 +769,38 @@ def test_eig(device):
     assert_sycl_queue_equal(dpnp_vec_queue, expected_queue)
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
+@pytest.mark.parametrize("device",
+                          valid_devices,
+                          ids=[device.filter_string for device in valid_devices])
+def test_eigh(device):
+    size = 4
+    a = numpy.arange(size * size, dtype=numpy.float64).reshape((size, size))
+    symm_orig = numpy.tril(a) + numpy.tril(a, -1).T + numpy.diag(numpy.full((size,), size * size, dtype=numpy.float64))
+    numpy_data = symm_orig
+    dpnp_symm_orig = dpnp.array(numpy_data, device=device)
+    dpnp_data = dpnp_symm_orig
+
+    dpnp_val, dpnp_vec = dpnp.linalg.eigh(dpnp_data)
+    numpy_val, numpy_vec = numpy.linalg.eigh(numpy_data)
+
+    assert_allclose(dpnp_val, numpy_val, rtol=1e-05, atol=1e-05)
+    assert_allclose(dpnp_vec, numpy_vec, rtol=1e-05, atol=1e-05)
+
+    assert (dpnp_val.dtype == numpy_val.dtype)
+    assert (dpnp_vec.dtype == numpy_vec.dtype)
+    assert (dpnp_val.shape == numpy_val.shape)
+    assert (dpnp_vec.shape == numpy_vec.shape)
+
+    expected_queue = dpnp_data.get_array().sycl_queue
+    dpnp_val_queue = dpnp_val.get_array().sycl_queue
+    dpnp_vec_queue = dpnp_vec.get_array().sycl_queue
+
+    # compare queue and device
+    assert_sycl_queue_equal(dpnp_val_queue, expected_queue)
+    assert_sycl_queue_equal(dpnp_vec_queue, expected_queue)
+
+
 @pytest.mark.parametrize("device",
                           valid_devices,
                           ids=[device.filter_string for device in valid_devices])
diff --git a/tests/third_party/cupy/creation_tests/test_ranges.py b/tests/third_party/cupy/creation_tests/test_ranges.py
index ac94297354f0..11e1d7f96048 100644
--- a/tests/third_party/cupy/creation_tests/test_ranges.py
+++ b/tests/third_party/cupy/creation_tests/test_ranges.py
@@ -192,7 +192,7 @@ def test_linspace_array_start_stop_axis1(self, xp, dtype_range, dtype_out):
 
     @testing.with_requires('numpy>=1.16')
     @testing.for_complex_dtypes()
-    @testing.numpy_cupy_array_equal()
+    @testing.numpy_cupy_allclose()
     def test_linspace_complex_start_stop(self, xp, dtype):
         start = xp.array([0, 120], dtype=dtype)
         stop = xp.array([100, 0], dtype=dtype)
diff --git a/tests/third_party/cupy/linalg_tests/test_eigenvalue.py b/tests/third_party/cupy/linalg_tests/test_eigenvalue.py
new file mode 100644
index 000000000000..fe577e32b285
--- /dev/null
+++ b/tests/third_party/cupy/linalg_tests/test_eigenvalue.py
@@ -0,0 +1,199 @@
+import unittest
+
+import numpy
+import pytest
+
+import dpnp as cupy
+from tests.third_party.cupy import testing
+
+
+def _get_hermitian(xp, a, UPLO):
+    # TODO: fix this, currently dpnp.transpose() doesn't support complex types
+    # and no dpnp_array.swapaxes()
+    a = _wrap_as_numpy_array(xp, a)
+    _xp = numpy
+
+    if UPLO == 'U':
+        _a = _xp.triu(a) + _xp.triu(a, k=1).swapaxes(-2, -1).conj()
+    else:
+        _a = _xp.tril(a) + _xp.tril(a, k=-1).swapaxes(-2, -1).conj()
+    return xp.array(_a)
+
+# TODO: remove once all required functionality is supported
+def _wrap_as_numpy_array(xp, a):
+    return a.asnumpy() if xp is cupy else a
+
+@testing.parameterize(*testing.product({
+    'UPLO': ['U', 'L'],
+}))
+class TestEigenvalue(unittest.TestCase):
+
+    @testing.for_all_dtypes()
+    @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4, contiguous_check=False)
+    def test_eigh(self, xp, dtype):
+        if xp == numpy and dtype == numpy.float16:
+            # NumPy's eigh does not support float16
+            _dtype = 'f'
+        else:
+            _dtype = dtype
+        if numpy.dtype(_dtype).kind == 'c':
+            a = xp.array([[1, 2j, 3], [4j, 5, 6j], [7, 8j, 9]], _dtype)
+        else:
+            a = xp.array([[1, 0, 3], [0, 5, 0], [7, 0, 9]], _dtype)
+        w, v = xp.linalg.eigh(a, UPLO=self.UPLO)
+
+        # Changed the verification method to check if Av and vw match, since
+        # the eigenvectors of eigh() with CUDA 11.6 are mathematically correct
+        # but may not match NumPy.
+        A = _get_hermitian(xp, a, self.UPLO)
+        if _dtype == numpy.float16:
+            tol = 1e-3
+        else:
+            tol = 1e-5
+
+        # TODO: remove _wrap_as_numpy_array() once @ support complex types
+        testing.assert_allclose(_wrap_as_numpy_array(xp, A) @ _wrap_as_numpy_array(xp, v),
+                                _wrap_as_numpy_array(xp, v) @ numpy.diag(_wrap_as_numpy_array(xp, w)),
+                                atol=tol, rtol=tol)
+
+        # Check if v @ vt is an identity matrix
+        testing.assert_allclose(_wrap_as_numpy_array(xp, v) @ _wrap_as_numpy_array(xp, v).swapaxes(-2, -1).conj(),
+                                numpy.identity(_wrap_as_numpy_array(xp, A).shape[-1], _dtype), atol=tol,
+                                rtol=tol)
+        if xp == numpy and dtype == numpy.float16:
+            w = w.astype('e')
+        return w
+
+    @testing.for_all_dtypes(no_bool=True, no_float16=True, no_complex=True)
+    @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4, contiguous_check=False)
+    def test_eigh_batched(self, xp, dtype):
+        a = xp.array([[[1, 0, 3], [0, 5, 0], [7, 0, 9]],
+                      [[3, 0, 3], [0, 7, 0], [7, 0, 11]]], dtype)
+        w, v = xp.linalg.eigh(a, UPLO=self.UPLO)
+
+        # NumPy, cuSOLVER, rocSOLVER all sort in ascending order,
+        # so w's should be directly comparable. However, both cuSOLVER
+        # and rocSOLVER pick a different convention for constructing
+        # eigenvectors, so v's are not directly comparible and we verify
+        # them through the eigen equation A*v=w*v.
+        A = _get_hermitian(xp, a, self.UPLO)
+        for i in range(a.shape[0]):
+            testing.assert_allclose(
+                A[i].dot(v[i]), w[i]*v[i], rtol=1e-5, atol=1e-5)
+        return w
+
+    @testing.for_complex_dtypes()
+    @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4, contiguous_check=False)
+    def test_eigh_complex_batched(self, xp, dtype):
+        print()
+        a = xp.array([[[1, 2j, 3], [4j, 5, 6j], [7, 8j, 9]],
+                      [[0, 2j, 3], [4j, 4, 6j], [7, 8j, 8]]], dtype)
+        w, v = xp.linalg.eigh(a, UPLO=self.UPLO)
+
+        # NumPy, cuSOLVER, rocSOLVER all sort in ascending order,
+        # so w's should be directly comparable. However, both cuSOLVER
+        # and rocSOLVER pick a different convention for constructing
+        # eigenvectors, so v's are not directly comparible and we verify
+        # them through the eigen equation A*v=w*v.
+        A = _get_hermitian(xp, a, self.UPLO)
+
+        # TODO: remove _wrap_as_numpy_array() once dpnp.dot() support complex types
+        A = _wrap_as_numpy_array(xp, A)
+        v = _wrap_as_numpy_array(xp, v)
+        w = _wrap_as_numpy_array(xp, w)
+
+        for i in range(a.shape[0]):
+            testing.assert_allclose(
+                A[i].dot(v[i]), w[i]*v[i], rtol=1e-5, atol=1e-5)
+        return w
+
+    @pytest.mark.skip("No support of dpnp.eigvalsh()")
+    @testing.for_all_dtypes(no_float16=True, no_complex=True)
+    @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4)
+    def test_eigvalsh(self, xp, dtype):
+        a = xp.array([[1, 0, 3], [0, 5, 0], [7, 0, 9]], dtype)
+        w = xp.linalg.eigvalsh(a, UPLO=self.UPLO)
+        # NumPy, cuSOLVER, rocSOLVER all sort in ascending order,
+        # so they should be directly comparable
+        return w
+
+    @pytest.mark.skip("No support of dpnp.eigvalsh()")
+    @testing.for_all_dtypes(no_float16=True, no_complex=True)
+    @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4)
+    def test_eigvalsh_batched(self, xp, dtype):
+        a = xp.array([[[1, 0, 3], [0, 5, 0], [7, 0, 9]],
+                      [[3, 0, 3], [0, 7, 0], [7, 0, 11]]], dtype)
+        w = xp.linalg.eigvalsh(a, UPLO=self.UPLO)
+        # NumPy, cuSOLVER, rocSOLVER all sort in ascending order,
+        # so they should be directly comparable
+        return w
+
+    @pytest.mark.skip("No support of dpnp.eigvalsh()")
+    @testing.for_complex_dtypes()
+    @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4)
+    def test_eigvalsh_complex(self, xp, dtype):
+        a = xp.array([[1, 2j, 3], [4j, 5, 6j], [7, 8j, 9]], dtype)
+        w = xp.linalg.eigvalsh(a, UPLO=self.UPLO)
+        # NumPy, cuSOLVER, rocSOLVER all sort in ascending order,
+        # so they should be directly comparable
+        return w
+
+    @pytest.mark.skip("No support of dpnp.eigvalsh()")
+    @testing.for_complex_dtypes()
+    @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4)
+    def test_eigvalsh_complex_batched(self, xp, dtype):
+        a = xp.array([[[1, 2j, 3], [4j, 5, 6j], [7, 8j, 9]],
+                      [[0, 2j, 3], [4j, 4, 6j], [7, 8j, 8]]], dtype)
+        w = xp.linalg.eigvalsh(a, UPLO=self.UPLO)
+        # NumPy, cuSOLVER, rocSOLVER all sort in ascending order,
+        # so they should be directly comparable
+        return w
+
+
+@testing.parameterize(*testing.product({
+    'UPLO': ['U', 'L'],
+    'shape': [(0, 0),
+              (2, 0, 0),
+              (0, 3, 3)]
+}))
+class TestEigenvalueEmpty(unittest.TestCase):
+
+    @testing.for_dtypes('ifdFD')
+    @testing.numpy_cupy_allclose()
+    def test_eigh(self, xp, dtype):
+        a = xp.empty(self.shape, dtype=dtype)
+        assert a.size == 0
+        return xp.linalg.eigh(a, UPLO=self.UPLO)
+
+    @pytest.mark.skip("No support of dpnp.eigvalsh()")
+    @testing.for_dtypes('ifdFD')
+    @testing.numpy_cupy_allclose()
+    def test_eigvalsh(self, xp, dtype):
+        a = xp.empty(self.shape, dtype)
+        assert a.size == 0
+        return xp.linalg.eigvalsh(a, UPLO=self.UPLO)
+
+
+@testing.parameterize(*testing.product({
+    'UPLO': ['U', 'L'],
+    'shape': [(),
+              (3,),
+              (2, 3),
+              (4, 0),
+              (2, 2, 3),
+              (0, 2, 3)]
+}))
+class TestEigenvalueInvalid(unittest.TestCase):
+
+    def test_eigh_shape_error(self):
+        for xp in (numpy, cupy):
+            a = xp.zeros(self.shape)
+            with pytest.raises((numpy.linalg.LinAlgError, ValueError)):
+                xp.linalg.eigh(a, self.UPLO)
+
+    @pytest.mark.skip("No support of dpnp.eigvalsh()")
+    def test_eigvalsh_shape_error(self):
+        for xp in (numpy, cupy):
+            a = xp.zeros(self.shape)
+            with pytest.raises((numpy.linalg.LinAlgError, ValueError)):
+                xp.linalg.eigvalsh(a, self.UPLO)
diff --git a/tests/third_party/cupy/testing/helper.py b/tests/third_party/cupy/testing/helper.py
index 6331309820d2..af8f6e545b29 100644
--- a/tests/third_party/cupy/testing/helper.py
+++ b/tests/third_party/cupy/testing/helper.py
@@ -200,6 +200,16 @@ def _contains_signed_and_unsigned(kw):
         any(d in vs for d in _float_dtypes + _signed_dtypes)
 
 
+def _wraps_partial(wrapped, *names):
+    # Only `wrapped` function have args of `names`.
+    def decorator(impl):
+        impl = functools.wraps(wrapped)(impl)
+        impl.__signature__ = inspect.signature(
+            functools.partial(wrapped, **{name: None for name in names}))
+        return impl
+    return decorator
+
+
 def _make_decorator(check_func, name, type_check, accept_error, sp_name=None,
                     scipy_name=None):
     assert isinstance(name, str)
@@ -640,16 +650,16 @@ def for_dtypes(dtypes, name='dtype'):
     argument.
     """
     def decorator(impl):
-        @functools.wraps(impl)
-        def test_func(self, *args, **kw):
+        @_wraps_partial(impl, name)
+        def test_func(*args, **kw):
             for dtype in dtypes:
                 try:
                     kw[name] = numpy.dtype(dtype).type
-                    impl(self, *args, **kw)
+                    impl(*args, **kw)
                 except unittest.SkipTest as e:
-                    pass  # print(f"Function decorator(): skipped: name={name} dtype={dtype} error={e}")
+                    print('skipped: {} = {} ({})'.format(name, dtype, e))
                 except Exception:
-                    # print(f"Function decorator(): name={name} dtype={dtype}")
+                    print(name, 'is', dtype)
                     raise
 
         return test_func
@@ -661,8 +671,14 @@ def _get_supported_float_dtypes():
     else:
         return (numpy.float32,)
 
+def _get_supported_complex_dtypes():
+    if select_default_device().has_aspect_fp64:
+        return (numpy.complex128, numpy.complex64)
+    else:
+        return (numpy.complex64,)
+
 
-_complex_dtypes = ()
+_complex_dtypes = _get_supported_complex_dtypes()
 _regular_float_dtypes = _get_supported_float_dtypes()
 _float_dtypes = _regular_float_dtypes
 _signed_dtypes = ()

From 5859aec1ca41a06cf39a95af36e562d7731c7406 Mon Sep 17 00:00:00 2001
From: Anton Volkov <antonwolfy@gmail.com>
Date: Thu, 20 Apr 2023 15:15:58 -0500
Subject: [PATCH 093/129] Applying the review comments

---
 .gitignore                               |  1 -
 dpnp/backend/extensions/lapack/heevd.cpp | 27 +++++++++++++++++-------
 dpnp/backend/extensions/lapack/syevd.cpp | 27 +++++++++++++++++-------
 dpnp/linalg/dpnp_utils_linalg.py         |  5 +----
 4 files changed, 39 insertions(+), 21 deletions(-)

diff --git a/.gitignore b/.gitignore
index ea9f2cba333d..8beb38f1efd6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,5 @@
 # CMake build and local install directory
 _skbuild
-build
 build_cython
 dpnp.egg-info
 
diff --git a/dpnp/backend/extensions/lapack/heevd.cpp b/dpnp/backend/extensions/lapack/heevd.cpp
index f873ee14d754..8c943646ff0a 100644
--- a/dpnp/backend/extensions/lapack/heevd.cpp
+++ b/dpnp/backend/extensions/lapack/heevd.cpp
@@ -44,14 +44,14 @@ namespace mkl_lapack = oneapi::mkl::lapack;
 namespace py = pybind11;
 
 template <typename T, typename RealT>
-static inline sycl::event call_heevd(sycl::queue exec_q,
-                                     const oneapi::mkl::job jobz,
-                                     const oneapi::mkl::uplo upper_lower,
-                                     const std::int64_t n,
-                                     T* a,
-                                     RealT* w,
-                                     std::vector<sycl::event> &host_task_events,
-                                     const std::vector<sycl::event>& depends)
+static sycl::event call_heevd(sycl::queue exec_q,
+                              const oneapi::mkl::job jobz,
+                              const oneapi::mkl::uplo upper_lower,
+                              const std::int64_t n,
+                              T* a,
+                              RealT* w,
+                              std::vector<sycl::event>& host_task_events,
+                              const std::vector<sycl::event>& depends)
 {
     validate_type_for_device<T>(exec_q);
     validate_type_for_device<RealT>(exec_q);
@@ -171,6 +171,17 @@ std::pair<sycl::event, sycl::event> heevd(sycl::queue exec_q,
     //     throw py::value_error("Arrays index overlapping segments of memory");
     // }
 
+    bool is_eig_vecs_f_contig = eig_vecs.is_f_contiguous();
+    bool is_eig_vals_c_contig = eig_vals.is_c_contiguous();
+    if (!is_eig_vecs_f_contig)
+    {
+        throw py::value_error("An array with input matrix / ouput eigenvectors must be F-contiguous");
+    }
+    else if (!is_eig_vals_c_contig)
+    {
+        throw py::value_error("An array with output eigenvalues must be C-contiguous");
+    }
+
     int eig_vecs_typenum = eig_vecs.get_typenum();
     int eig_vals_typenum = eig_vals.get_typenum();
     auto const& dpctl_capi = dpctl::detail::dpctl_capi::get();
diff --git a/dpnp/backend/extensions/lapack/syevd.cpp b/dpnp/backend/extensions/lapack/syevd.cpp
index 93be82d201d8..a4dded7543ab 100644
--- a/dpnp/backend/extensions/lapack/syevd.cpp
+++ b/dpnp/backend/extensions/lapack/syevd.cpp
@@ -44,14 +44,14 @@ namespace mkl_lapack = oneapi::mkl::lapack;
 namespace py = pybind11;
 
 template <typename T>
-static inline sycl::event call_syevd(sycl::queue exec_q,
-                                     const oneapi::mkl::job jobz,
-                                     const oneapi::mkl::uplo upper_lower,
-                                     const std::int64_t n,
-                                     T* a,
-                                     T* w,
-                                     std::vector<sycl::event> &host_task_events,
-                                     const std::vector<sycl::event>& depends)
+static sycl::event call_syevd(sycl::queue exec_q,
+                              const oneapi::mkl::job jobz,
+                              const oneapi::mkl::uplo upper_lower,
+                              const std::int64_t n,
+                              T* a,
+                              T* w,
+                              std::vector<sycl::event>& host_task_events,
+                              const std::vector<sycl::event>& depends)
 {
     validate_type_for_device<T>(exec_q);
 
@@ -170,6 +170,17 @@ std::pair<sycl::event, sycl::event> syevd(sycl::queue exec_q,
     //     throw py::value_error("Arrays index overlapping segments of memory");
     // }
 
+    bool is_eig_vecs_f_contig = eig_vecs.is_f_contiguous();
+    bool is_eig_vals_c_contig = eig_vals.is_c_contiguous();
+    if (!is_eig_vecs_f_contig)
+    {
+        throw py::value_error("An array with input matrix / ouput eigenvectors must be F-contiguous");
+    }
+    else if (!is_eig_vals_c_contig)
+    {
+        throw py::value_error("An array with output eigenvalues must be C-contiguous");
+    }
+
     int eig_vecs_typenum = eig_vecs.get_typenum();
     int eig_vals_typenum = eig_vals.get_typenum();
     auto const& dpctl_capi = dpctl::detail::dpctl_capi::get();
diff --git a/dpnp/linalg/dpnp_utils_linalg.py b/dpnp/linalg/dpnp_utils_linalg.py
index 32a9ac7d5607..b7218b75d817 100644
--- a/dpnp/linalg/dpnp_utils_linalg.py
+++ b/dpnp/linalg/dpnp_utils_linalg.py
@@ -96,11 +96,8 @@ def dpnp_eigh(a, UPLO):
             # call LAPACK extension function to get eigenvalues and eigenvectors of a portion of matrix A
             ht_lapack_ev[i], _ = getattr(li, lapack_func)(a_sycl_queue, jobz, uplo, eig_vecs[i].get_array(), w[i].get_array(), depends=[copy_ev])
 
-            # TODO: remove once dpctl fix is available
-            ht_lapack_ev[i].wait()
-
         for i in range(op_count):
-            # ht_lapack_ev[i].wait()
+            ht_lapack_ev[i].wait()
             ht_copy_ev[i].wait()
 
         # combine the list of eigenvectors into a single array

From 4d27b4cf1631b1cca4c173287e3c2a2f646839b3 Mon Sep 17 00:00:00 2001
From: Anton Volkov <antonwolfy@gmail.com>
Date: Sat, 29 Apr 2023 14:25:53 +0200
Subject: [PATCH 094/129] Added array type check in dpnp.get_usm_ndarray()

---
 dpnp/dpnp_iface.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/dpnp/dpnp_iface.py b/dpnp/dpnp_iface.py
index f6aabbb1399d..ce3c540539d6 100644
--- a/dpnp/dpnp_iface.py
+++ b/dpnp/dpnp_iface.py
@@ -390,9 +390,18 @@ def get_usm_ndarray(a):
     out : usm_ndarray
         A dpctl USM ndarray of input array `a`.
 
+    Raises
+    ------
+    TypeError
+        If input parameter `a` is of unsupported array type.
+
     """
 
-    return a.get_array() if isinstance(a, dpnp_array) else a
+    if isinstance(a, dpnp_array):
+        return a.get_array()
+    if isinstance(a, dpt.usm_ndarray):
+        return a
+    raise TypeError("An array must be any of supported type, but got {}".format(type(a)))
 
 
 def is_supported_array_type(a):

From 25e6b9aef232363b5acc8ae4cf7f91cfd86237ff Mon Sep 17 00:00:00 2001
From: Anton Volkov <antonwolfy@gmail.com>
Date: Fri, 5 May 2023 08:51:51 -0500
Subject: [PATCH 095/129] Fix compilation error with sycl::abs()

---
 dpnp/backend/kernels/dpnp_krnl_mathematical.cpp | 4 ++++
 dpnp/backend/src/dpnp_utils.hpp                 | 8 ++++++++
 2 files changed, 12 insertions(+)

diff --git a/dpnp/backend/kernels/dpnp_krnl_mathematical.cpp b/dpnp/backend/kernels/dpnp_krnl_mathematical.cpp
index cbcd191fae64..b82cbb49b1a6 100644
--- a/dpnp/backend/kernels/dpnp_krnl_mathematical.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_mathematical.cpp
@@ -170,10 +170,14 @@ DPCTLSyclEventRef dpnp_elemwise_absolute_c(DPCTLSyclQueueRef q_ref,
 
                 sycl::vec<_DataType_input, vec_sz> data_vec = sg.load<vec_sz>(input_ptrT(&array1[start]));
 
+#if (__SYCL_COMPILER_VERSION < __SYCL_COMPILER_VECTOR_ABS_CHANGED)
                 // sycl::abs() returns unsigned integers only, so explicit casting to signed ones is required
                 using result_absT = typename cl::sycl::detail::make_unsigned<_DataType_output>::type;
                 sycl::vec<_DataType_output, vec_sz> res_vec =
                     dpnp_vec_cast<_DataType_output, result_absT, vec_sz>(sycl::abs(data_vec));
+#else
+                sycl::vec<_DataType_output, vec_sz> res_vec = sycl::abs(data_vec);
+#endif
 
                 sg.store<vec_sz>(result_ptrT(&result[start]), res_vec);
             }
diff --git a/dpnp/backend/src/dpnp_utils.hpp b/dpnp/backend/src/dpnp_utils.hpp
index 985d5a61494e..6c1bda90cba7 100644
--- a/dpnp/backend/src/dpnp_utils.hpp
+++ b/dpnp/backend/src/dpnp_utils.hpp
@@ -40,6 +40,14 @@
     (__LIBSYCL_MAJOR_VERSION > major) || (__LIBSYCL_MAJOR_VERSION == major and __LIBSYCL_MINOR_VERSION > minor) ||     \
         (__LIBSYCL_MAJOR_VERSION == major and __LIBSYCL_MINOR_VERSION == minor and __LIBSYCL_PATCH_VERSION >= patch)
 
+/**
+ * Version of SYCL DPC++ 2023 compiler where a return type of sycl::abs() is changed
+ * from unsinged integer to signed one of input vector.
+ */
+#ifndef __SYCL_COMPILER_VECTOR_ABS_CHANGED
+#define __SYCL_COMPILER_VECTOR_ABS_CHANGED 20230503L
+#endif
+
 /**
  * Version of SYCL DPC++ 2023 compiler at which transition to SYCL 2020 occurs.
  * Intel(R) oneAPI DPC++ 2022.2.1 compiler has version 20221020L on Linux and

From 02d86b5db438f5390b8f71ffc4642a0980b6e21d Mon Sep 17 00:00:00 2001
From: vlad-perevezentsev <vladislav.perevezentsev@intel.com>
Date: Tue, 23 May 2023 14:17:23 +0200
Subject: [PATCH 096/129] Change branches for generate coverage push event
 (#1384)

---
 .github/workflows/generate_coverage.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/generate_coverage.yaml b/.github/workflows/generate_coverage.yaml
index c60512fe3e30..6939681d3637 100644
--- a/.github/workflows/generate_coverage.yaml
+++ b/.github/workflows/generate_coverage.yaml
@@ -2,7 +2,7 @@ name: Generate coverage data for dpnp
 on:
   pull_request:
   push:
-    branches: [use-skbuild-and-cmake]
+    branches: [master]
 
 jobs:
   generate-coverage:

From 2e4a38949ee6dfe67ccc690f8b20e0568832f09f Mon Sep 17 00:00:00 2001
From: vlad-perevezentsev <vladislav.perevezentsev@intel.com>
Date: Wed, 24 May 2023 14:33:57 +0200
Subject: [PATCH 097/129] Update scripts for conda recipe (#1394)

* Pin DPC++ 2023.1.0

* Use current CMake version for PLATFORM_DIR variable

* Add CMAKE_PATH env variable
---
 conda-recipe/bld.bat   | 11 ++++++++++-
 conda-recipe/meta.yaml |  3 ++-
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/conda-recipe/bld.bat b/conda-recipe/bld.bat
index 5793f9f5d5e7..bef5d19d85fc 100644
--- a/conda-recipe/bld.bat
+++ b/conda-recipe/bld.bat
@@ -29,7 +29,16 @@ FOR %%V IN (14.0.0 14 15.0.0 15 16.0.0 16) DO @(
   )
 )
 
-set "PLATFORM_DIR=%PREFIX%\Library\share\cmake-3.22\Modules\Platform"
+for /f "tokens=*" %%p in ('where cmake') do set CMAKE_PATH=%%p & goto :continue
+:continue
+for /f "tokens=3" %%a in ('%CMAKE_PATH% --version') do set CMAKE_VERSION=%%a & goto :continue
+:continue
+for /f "tokens=1-2 delims=." %%a in ("%CMAKE_VERSION%") do (
+    set CMAKE_VERSION_MAJOR=%%a
+    set CMAKE_VERSION_MINOR=%%b
+)
+
+set "PLATFORM_DIR=%PREFIX%\Library\share\cmake-%CMAKE_VERSION_MAJOR%.%CMAKE_VERSION_MINOR%\Modules\Platform"
 set "FN=Windows-IntelLLVM.cmake"
 
 rem Save the original file, and copy patched file to
diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml
index bef7e00618c0..d03e8741c95d 100644
--- a/conda-recipe/meta.yaml
+++ b/conda-recipe/meta.yaml
@@ -21,7 +21,8 @@ requirements:
       - scikit-build
     build:
       - {{ compiler('cxx') }}
-      - {{ compiler('dpcpp') }}  >=2023.0  # [not osx]
+      - {{ compiler('dpcpp') }}  =2023.1.0  # [not osx]
+      - sysroot_linux-64 >=2.17  # [linux]
     run:
       - python
       - dpctl >=0.14.2

From de9a7b11e17b5964aa7776142d01b6a13080538c Mon Sep 17 00:00:00 2001
From: Anton Volkov <antonwolfy@gmail.com>
Date: Tue, 23 May 2023 12:36:31 -0500
Subject: [PATCH 098/129] Pin to sysroot 2.28 or above

---
 .github/workflows/conda-package.yml | 5 ++---
 conda-recipe/meta.yaml              | 6 +++---
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml
index 02e68ac8e82b..c4d84c6f621d 100644
--- a/.github/workflows/conda-package.yml
+++ b/.github/workflows/conda-package.yml
@@ -9,7 +9,7 @@ on:
 env:
   PACKAGE_NAME: dpnp
   MODULE_NAME: dpnp
-  CHANNELS: '-c dppy/label/dev -c intel -c main --override-channels'
+  CHANNELS: '-c dppy/label/dev -c intel -c conda-forge --override-channels'
   TEST_SCOPE: >-
       test_arraycreation.py
       test_dot.py
@@ -59,7 +59,6 @@ jobs:
           python-version: ${{ matrix.python }}
           miniconda-version: 'latest'
           activate-environment: 'build'
-          use-only-tar-bz2: true
 
       - if: matrix.os == 'ubuntu-20.04'
         name: Store conda paths as envs on Linux
@@ -170,7 +169,7 @@ jobs:
       - name: Cache conda packages
         uses: actions/cache@v3.2.6
         env:
-          CACHE_NUMBER: 1  # Increase to reset cache
+          CACHE_NUMBER: 1 # Increase to reset cache
         with:
           path: ${{ env.conda-pkgs }}
           key:
diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml
index d03e8741c95d..8574f4f9908d 100644
--- a/conda-recipe/meta.yaml
+++ b/conda-recipe/meta.yaml
@@ -14,15 +14,15 @@ requirements:
       - ninja
       - git
       - dpctl >=0.14.2
-      - mkl-devel-dpcpp {{ environ.get('MKL_VER', '>=2023.0.0') }}
+      - mkl-devel-dpcpp {{ environ.get('MKL_VER', '>=2023.1.0') }}
       - onedpl-devel
       - tbb-devel
       - wheel
       - scikit-build
     build:
       - {{ compiler('cxx') }}
-      - {{ compiler('dpcpp') }}  =2023.1.0  # [not osx]
-      - sysroot_linux-64 >=2.17  # [linux]
+      - {{ compiler('dpcpp') }}  >=2023.1.0  # [not osx]
+      - sysroot_linux-64 >=2.28 # [linux]
     run:
       - python
       - dpctl >=0.14.2

From 04bd709a5991c2b9ed29905427e1bbcb6048ef9b Mon Sep 17 00:00:00 2001
From: Anton Volkov <antonwolfy@gmail.com>
Date: Wed, 24 May 2023 13:31:30 -0500
Subject: [PATCH 099/129] Extending workaround of ignoring per_kernel option to
 cmake 2.26

---
 dpnp/backend/CMakeLists.txt                   | 2 +-
 dpnp/backend/extensions/lapack/CMakeLists.txt | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/dpnp/backend/CMakeLists.txt b/dpnp/backend/CMakeLists.txt
index 6fd13acd1128..8aeadc38c376 100644
--- a/dpnp/backend/CMakeLists.txt
+++ b/dpnp/backend/CMakeLists.txt
@@ -50,7 +50,7 @@ set(DPNP_SRC
 set(_trgt "dpnp_backend_c")
 
 if(WIN32)
-    if (${CMAKE_VERSION} VERSION_LESS "3.23")
+    if (${CMAKE_VERSION} VERSION_LESS "3.27")
         # this is a work-around for target_link_options inserting option after -link option, cause
         # linker to ignore it.
         set(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} -fsycl-device-code-split=per_kernel")
diff --git a/dpnp/backend/extensions/lapack/CMakeLists.txt b/dpnp/backend/extensions/lapack/CMakeLists.txt
index a32adaa431ff..8040fb433cb9 100644
--- a/dpnp/backend/extensions/lapack/CMakeLists.txt
+++ b/dpnp/backend/extensions/lapack/CMakeLists.txt
@@ -32,7 +32,7 @@ pybind11_add_module(${python_module_name} MODULE
 )
 
 if (WIN32)
-    if (${CMAKE_VERSION} VERSION_LESS "3.23")
+    if (${CMAKE_VERSION} VERSION_LESS "3.27")
         # this is a work-around for target_link_options inserting option after -link option, cause
         # linker to ignore it.
         set(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} -fsycl-device-code-split=per_kernel")

From 3a03847a88824ed3737101899b48a5d08e8395f0 Mon Sep 17 00:00:00 2001
From: Anton Volkov <antonwolfy@gmail.com>
Date: Wed, 24 May 2023 14:37:48 -0500
Subject: [PATCH 100/129] Modify the code to patch Windows-IntelLLVM.cmake for
 different cmake versions

---
 ...LVM.cmake => Windows-IntelLLVM_3.22.cmake} |  0
 .../workflows/Windows-IntelLLVM_3.26.cmake    | 66 +++++++++++++++++++
 conda-recipe/bld.bat                          | 18 ++---
 3 files changed, 71 insertions(+), 13 deletions(-)
 rename .github/workflows/{Windows-IntelLLVM.cmake => Windows-IntelLLVM_3.22.cmake} (100%)
 mode change 100755 => 100644
 create mode 100644 .github/workflows/Windows-IntelLLVM_3.26.cmake

diff --git a/.github/workflows/Windows-IntelLLVM.cmake b/.github/workflows/Windows-IntelLLVM_3.22.cmake
old mode 100755
new mode 100644
similarity index 100%
rename from .github/workflows/Windows-IntelLLVM.cmake
rename to .github/workflows/Windows-IntelLLVM_3.22.cmake
diff --git a/.github/workflows/Windows-IntelLLVM_3.26.cmake b/.github/workflows/Windows-IntelLLVM_3.26.cmake
new file mode 100644
index 000000000000..eac3f0a9a827
--- /dev/null
+++ b/.github/workflows/Windows-IntelLLVM_3.26.cmake
@@ -0,0 +1,66 @@
+# Distributed under the OSI-approved BSD 3-Clause License.  See accompanying
+# file Copyright.txt or https://cmake.org/licensing for details.
+
+
+# This module is shared by multiple languages; use include blocker.
+if(__WINDOWS_INTEL_LLVM)
+  return()
+endif()
+set(__WINDOWS_INTEL_LLVM 1)
+
+# Platform/Windows-MSVC adds some linking options icx/ifx do not understand,
+# but that need to be passed to the linker.  Wrap all the linking options from
+# Platform/Windows-MSVC so that the compiler will hand them off to the linker
+# without interpreting them.
+
+# Save original CMAKE_${t}_LINKER_FLAGS_INIT
+foreach(t EXE SHARED MODULE STATIC)
+  set(_saved_cmake_${t}_linker_flags_init ${CMAKE_${t}_LINKER_FLAGS_INIT})
+  set(CMAKE_${t}_LINKER_FLAGS_INIT "")
+endforeach()
+include(Platform/Windows-MSVC)
+# Wrap linker flags from Windows-MSVC
+set(_IntelLLVM_LINKER_WRAPPER_FLAG "/Qoption,link,")
+set(_IntelLLVM_LINKER_WRAPPER_FLAG_SEP ",")
+foreach(t EXE SHARED MODULE STATIC)
+  set(_wrapped_linker_flags "")
+  foreach(flag ${CMAKE_${t}_LINKER_FLAGS_INIT})
+    string(STRIP ${flag} flag)
+    list(APPEND _wrapped_linker_flags "${_IntelLLVM_LINKER_WRAPPER_FLAG}${flag}")
+  endforeach()
+  set(CMAKE_${t}_LINKER_FLAGS_INIT "")
+  list(APPEND CMAKE_${t}_LINKER_FLAGS_INIT
+    ${_saved_cmake_${t}_linker_flags_init} ${_wrapped_linker_flags})
+endforeach()
+
+macro(__windows_compiler_intel lang)
+  __windows_compiler_msvc(${lang})
+
+  set(CMAKE_${lang}_LINKER_WRAPPER_FLAG "${_IntelLLVM_LINKER_WRAPPER_FLAG}")
+  set(CMAKE_${lang}_LINKER_WRAPPER_FLAG_SEP "${_IntelLLVM_LINKER_WRAPPER_FLAG_SEP}")
+  set(CMAKE_${lang}_CREATE_WIN32_EXE "${CMAKE_${lang}_LINKER_WRAPPER_FLAG}/subsystem:windows")
+  set(CMAKE_${lang}_CREATE_CONSOLE_EXE "${CMAKE_${lang}_LINKER_WRAPPER_FLAG}/subsystem:console")
+  set(CMAKE_LINK_DEF_FILE_FLAG "${CMAKE_${lang}_LINKER_WRAPPER_FLAG}/DEF:")
+  set(CMAKE_LIBRARY_PATH_FLAG "${CMAKE_${lang}_LINKER_WRAPPER_FLAG}/LIBPATH:")
+
+  # Features for LINK_LIBRARY generator expression
+  if(MSVC_VERSION GREATER "1900")
+    ## WHOLE_ARCHIVE: Force loading all members of an archive
+    set(CMAKE_LINK_LIBRARY_USING_WHOLE_ARCHIVE "LINKER:/WHOLEARCHIVE:<LIBRARY>")
+    set(CMAKE_LINK_LIBRARY_USING_WHOLE_ARCHIVE_SUPPORTED TRUE)
+  endif()
+
+  set(CMAKE_${lang}_LINK_EXECUTABLE
+    "${_CMAKE_VS_LINK_EXE}<CMAKE_${lang}_COMPILER> ${CMAKE_CL_NOLOGO} <CMAKE_${lang}_LINK_FLAGS> <OBJECTS> ${CMAKE_START_TEMP_FILE} <LINK_FLAGS> <LINK_LIBRARIES> /link /out:<TARGET> /implib:<TARGET_IMPLIB> /pdb:<TARGET_PDB> /version:<TARGET_VERSION_MAJOR>.<TARGET_VERSION_MINOR>${_PLATFORM_LINK_FLAGS} ${CMAKE_END_TEMP_FILE}")
+  set(CMAKE_${lang}_CREATE_SHARED_LIBRARY
+    "${_CMAKE_VS_LINK_DLL}<CMAKE_${lang}_COMPILER> ${CMAKE_CL_NOLOGO} <CMAKE_${lang}_LINK_FLAGS> <OBJECTS> ${CMAKE_START_TEMP_FILE} -LD <LINK_FLAGS> <LINK_LIBRARIES> -link /out:<TARGET> /implib:<TARGET_IMPLIB> /pdb:<TARGET_PDB> /version:<TARGET_VERSION_MAJOR>.<TARGET_VERSION_MINOR>${_PLATFORM_LINK_FLAGS} ${CMAKE_END_TEMP_FILE}")
+  set(CMAKE_${lang}_CREATE_SHARED_MODULE ${CMAKE_${lang}_CREATE_SHARED_LIBRARY})
+  if (NOT "${lang}" STREQUAL "Fortran" OR CMAKE_${lang}_COMPILER_VERSION VERSION_GREATER_EQUAL 2022.1)
+    # The Fortran driver does not support -fuse-ld=llvm-lib before compiler version 2022.1
+    set(CMAKE_${lang}_CREATE_STATIC_LIBRARY
+      "<CMAKE_${lang}_COMPILER> ${CMAKE_CL_NOLOGO} <CMAKE_${lang}_LINK_FLAGS> <OBJECTS> ${CMAKE_START_TEMP_FILE} -fuse-ld=llvm-lib -o <TARGET> <LINK_FLAGS> <LINK_LIBRARIES> ${CMAKE_END_TEMP_FILE}")
+  endif()
+
+  set(CMAKE_DEPFILE_FLAGS_${lang} "-QMD -QMT <DEP_TARGET> -QMF <DEP_FILE>")
+  set(CMAKE_${lang}_DEPFILE_FORMAT gcc)
+endmacro()
diff --git a/conda-recipe/bld.bat b/conda-recipe/bld.bat
index bef5d19d85fc..9398b115175b 100644
--- a/conda-recipe/bld.bat
+++ b/conda-recipe/bld.bat
@@ -22,23 +22,15 @@ set "SKBUILD_ARGS=-G Ninja -- -DCMAKE_C_COMPILER:PATH=icx -DCMAKE_CXX_COMPILER:P
 set "SKBUILD_ARGS=%SKBUILD_ARGS% -DDPCTL_MODULE_PATH:PATH=%DPCTL_CMAKE_DIR% "
 set "SKBUILD_ARGS=%SKBUILD_ARGS% -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON"
 
-FOR %%V IN (14.0.0 14 15.0.0 15 16.0.0 16) DO @(
+FOR %%V IN (14.0.0 14 15.0.0 15 16.0.0 16 17.0.0 17) DO @(
   REM set DIR_HINT if directory exists
   IF EXIST "%BUILD_PREFIX%\Library\lib\clang\%%V\" (
      SET "SYCL_INCLUDE_DIR_HINT=%BUILD_PREFIX%\Library\lib\clang\%%V"
   )
 )
 
-for /f "tokens=*" %%p in ('where cmake') do set CMAKE_PATH=%%p & goto :continue
-:continue
-for /f "tokens=3" %%a in ('%CMAKE_PATH% --version') do set CMAKE_VERSION=%%a & goto :continue
-:continue
-for /f "tokens=1-2 delims=." %%a in ("%CMAKE_VERSION%") do (
-    set CMAKE_VERSION_MAJOR=%%a
-    set CMAKE_VERSION_MINOR=%%b
-)
-
-set "PLATFORM_DIR=%PREFIX%\Library\share\cmake-%CMAKE_VERSION_MAJOR%.%CMAKE_VERSION_MINOR%\Modules\Platform"
+set "PATCHED_CMAKE_VERSION=3.26"
+set "PLATFORM_DIR=%PREFIX%\Library\share\cmake-%PATCHED_CMAKE_VERSION%\Modules\Platform"
 set "FN=Windows-IntelLLVM.cmake"
 
 rem Save the original file, and copy patched file to
@@ -47,7 +39,7 @@ if EXIST "%PLATFORM_DIR%" (
   dir "%PLATFORM_DIR%\%FN%"
   copy /Y "%PLATFORM_DIR%\%FN%" .
   if errorlevel 1 exit 1
-  copy /Y .github\workflows\Windows-IntelLLVM.cmake "%PLATFORM_DIR%"
+  copy /Y ".github\workflows\Windows-IntelLLVM_%PATCHED_CMAKE_VERSION%.cmake" "%PLATFORM_DIR%\%FN%"
   if errorlevel 1 exit 1
 )
 
@@ -65,6 +57,6 @@ if NOT "%WHEELS_OUTPUT_FOLDER%"=="" (
 
 rem copy back
 if EXIST "%PLATFORM_DIR%" (
-   copy /Y "%FN%" "%PLATFORM_DIR%"
+   copy /Y "%FN%" "%PLATFORM_DIR%\%FN%"
    if errorlevel 1 exit 1
 )

From 4674624b7e0fde6447f1d7cc9f8110cca83ac6fa Mon Sep 17 00:00:00 2001
From: Anton Volkov <antonwolfy@gmail.com>
Date: Mon, 22 May 2023 06:58:02 -0500
Subject: [PATCH 101/129] Reuse OneDPL implementation of std::nth_element() for
 partition of 1D array

---
 dpnp/backend/kernels/dpnp_krnl_sorting.cpp | 25 +++++++++-
 dpnp/backend/src/dpnp_fptr.hpp             | 53 ++++++++++++++++++++++
 tests/test_sort.py                         | 20 ++++----
 3 files changed, 87 insertions(+), 11 deletions(-)

diff --git a/dpnp/backend/kernels/dpnp_krnl_sorting.cpp b/dpnp/backend/kernels/dpnp_krnl_sorting.cpp
index 614bb94f0705..01bc26cdf8f0 100644
--- a/dpnp/backend/kernels/dpnp_krnl_sorting.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_sorting.cpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2020, Intel Corporation
+// Copyright (c) 2016-2023, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -160,6 +160,24 @@ DPCTLSyclEventRef dpnp_partition_c(DPCTLSyclQueueRef q_ref,
 
     sycl::queue q = *(reinterpret_cast<sycl::queue*>(q_ref));
 
+    if (ndim == 1) // 1d array with C-contiguous data
+    {
+        _DataType* arr = static_cast<_DataType*>(array1_in);
+        _DataType* result = static_cast<_DataType*>(result1);
+
+        auto policy = oneapi::dpl::execution::make_device_policy<dpnp_partition_c_kernel<_DataType>>(q);
+
+        // fill the result array with data from input one
+        q.memcpy(result, arr, size * sizeof(_DataType)).wait();
+
+        // make a partial sorting such that:
+        // 1. result[0 <= i < kth]    <= result[kth]
+        // 2. result[kth <= i < size] >= result[kth]
+        // event-blocking call, no need for wait()
+        std::nth_element(policy, result, result + kth, result + size, dpnp_less_comp());
+        return event_ref;
+    }
+
     DPNPC_ptr_adapter<_DataType> input1_ptr(q_ref, array1_in, size, true);
     DPNPC_ptr_adapter<_DataType> input2_ptr(q_ref, array2_in, size, true);
     DPNPC_ptr_adapter<_DataType> result1_ptr(q_ref, result1, size, true, true);
@@ -181,7 +199,7 @@ DPCTLSyclEventRef dpnp_partition_c(DPCTLSyclQueueRef q_ref,
             size_t ind = j - ind_begin;
             matrix[ind] = arr2[j];
         }
-        std::partial_sort(matrix, matrix + shape_[ndim - 1], matrix + shape_[ndim - 1]);
+        std::partial_sort(matrix, matrix + shape_[ndim - 1], matrix + shape_[ndim - 1], dpnp_less_comp());
         for (size_t j = ind_begin; j < ind_end + 1; ++j)
         {
             size_t ind = j - ind_begin;
@@ -492,10 +510,13 @@ void func_map_init_sorting(func_map_t& fmap)
     fmap[DPNPFuncName::DPNP_FN_PARTITION][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_partition_default_c<float>};
     fmap[DPNPFuncName::DPNP_FN_PARTITION][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_partition_default_c<double>};
 
+    fmap[DPNPFuncName::DPNP_FN_PARTITION_EXT][eft_BLN][eft_BLN] = {eft_BLN, (void*)dpnp_partition_ext_c<bool>};
     fmap[DPNPFuncName::DPNP_FN_PARTITION_EXT][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_partition_ext_c<int32_t>};
     fmap[DPNPFuncName::DPNP_FN_PARTITION_EXT][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_partition_ext_c<int64_t>};
     fmap[DPNPFuncName::DPNP_FN_PARTITION_EXT][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_partition_ext_c<float>};
     fmap[DPNPFuncName::DPNP_FN_PARTITION_EXT][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_partition_ext_c<double>};
+    fmap[DPNPFuncName::DPNP_FN_PARTITION_EXT][eft_C64][eft_C64] = {eft_C64, (void*)dpnp_partition_ext_c<std::complex<float>>};
+    fmap[DPNPFuncName::DPNP_FN_PARTITION_EXT][eft_C128][eft_C128] = {eft_C128, (void*)dpnp_partition_ext_c<std::complex<double>>};
 
     fmap[DPNPFuncName::DPNP_FN_SEARCHSORTED][eft_INT][eft_INT] = {
         eft_INT, (void*)dpnp_searchsorted_default_c<int32_t, int64_t>};
diff --git a/dpnp/backend/src/dpnp_fptr.hpp b/dpnp/backend/src/dpnp_fptr.hpp
index 9f8c102bca5e..cb33e70185e2 100644
--- a/dpnp/backend/src/dpnp_fptr.hpp
+++ b/dpnp/backend/src/dpnp_fptr.hpp
@@ -187,6 +187,59 @@ constexpr auto both_types_are_any_of = std::conjunction_v<is_any<T1, Ts...>, is_
 template <typename T1, typename T2, typename... Ts>
 constexpr auto none_of_both_types = !std::disjunction_v<is_any<T1, Ts...>, is_any<T2, Ts...>>;
 
+
+/**
+ * @brief If the type _Tp is a reference type, provides the member typedef type which is the type referred to by _Tp
+ * with its topmost cv-qualifiers removed. Otherwise type is _Tp with its topmost cv-qualifiers removed.
+ *
+ * @note std::remove_cvref is only available since c++20
+ */
+template<typename _Tp>
+using dpnp_remove_cvref_t = typename std::remove_cv_t<typename std::remove_reference_t<_Tp>>;
+
+
+/**
+ * @brief "<" comparison with complex types support.
+ *
+ * @note return a result of lexicographical "<" comparison for complex types.
+ */
+class dpnp_less_comp
+{
+public:
+    template <typename _Xp, typename _Yp>
+    bool operator()(_Xp&& __x, _Yp&& __y) const
+    {
+        if constexpr (both_types_are_same<dpnp_remove_cvref_t<_Xp>, dpnp_remove_cvref_t<_Yp>, std::complex<float>, std::complex<double>>)
+        {
+            bool ret = false;
+            _Xp a = std::forward<_Xp>(__x);
+            _Yp b = std::forward<_Yp>(__y);
+
+            if (a.real() < b.real())
+            {
+                ret = (a.imag() == a.imag() || b.imag() != b.imag());
+            }
+            else if (a.real() > b.real())
+            {
+                ret = (b.imag() != b.imag() && a.imag() == a.imag());
+            }
+            else if (a.real() == b.real() || (a.real() != a.real() && b.real() != b.real()))
+            {
+                ret = (a.imag() < b.imag() || (b.imag() != b.imag() && a.imag() == a.imag()));
+            }
+            else
+            {
+                ret = (b.real() != b.real());
+            }
+            return ret;
+        }
+        else
+        {
+            return std::forward<_Xp>(__x) < std::forward<_Yp>(__y);
+        }
+    }
+};
+
 /**
  * FPTR interface initialization functions
  */
diff --git a/tests/test_sort.py b/tests/test_sort.py
index aa633c0c3ad9..975c654cbb94 100644
--- a/tests/test_sort.py
+++ b/tests/test_sort.py
@@ -1,16 +1,18 @@
 import pytest
+from .helper import get_all_dtypes
 
 import dpnp
 
 import numpy
+from numpy.testing import (
+    assert_array_equal
+)
 
 
 @pytest.mark.parametrize("kth",
                          [0, 1],
                          ids=['0', '1'])
-@pytest.mark.parametrize("dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=['float64', 'float32', 'int64', 'int32'])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_none=True))
 @pytest.mark.parametrize("array",
                          [[3, 4, 2, 1],
                           [[1, 0], [3, 0]],
@@ -25,11 +27,11 @@
                               '[[[1, -3], [3, 0]], [[5, 2], [0, 1]], [[1, 0], [0, 1]]]',
                               '[[[[8, 2], [3, 0]], [[5, 2], [0, 1]]], [[[1, 3], [3, 1]], [[5, 2], [0, 1]]]]'])
 def test_partition(array, dtype, kth):
-    a = numpy.array(array, dtype)
-    ia = dpnp.array(array, dtype)
-    expected = numpy.partition(a, kth)
-    result = dpnp.partition(ia, kth)
-    numpy.testing.assert_array_equal(expected, result)
+    a = dpnp.array(array, dtype)
+    p = dpnp.partition(a, kth)
+
+    assert (p[0:kth] <= p[kth]).all()
+    assert (p[kth] <= p[kth + 1:]).all()
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
@@ -77,4 +79,4 @@ def test_searchsorted(array, dtype, v_, side):
     iv = dpnp.array(v_, dtype)
     expected = numpy.searchsorted(a, v, side=side)
     result = dpnp.searchsorted(ia, iv, side=side)
-    numpy.testing.assert_array_equal(expected, result)
+    assert_array_equal(expected, result)

From f4c89860cee12fe82d6a79fad3c2e7dd40debff8 Mon Sep 17 00:00:00 2001
From: Anton Volkov <antonwolfy@gmail.com>
Date: Mon, 22 May 2023 16:34:47 -0500
Subject: [PATCH 102/129] Add complex type in dpnp.copy() and tune tests

---
 .github/workflows/conda-package.yml         | 1 +
 dpnp/backend/kernels/dpnp_krnl_elemwise.cpp | 4 ++--
 tests/skipped_tests.tbl                     | 2 --
 tests/skipped_tests_gpu.tbl                 | 1 -
 tests/test_sort.py                          | 7 +++++--
 5 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml
index c4d84c6f621d..83173e6d9673 100644
--- a/.github/workflows/conda-package.yml
+++ b/.github/workflows/conda-package.yml
@@ -18,6 +18,7 @@ env:
       test_linalg.py
       test_mathematical.py
       test_random_state.py
+      test_sort.py
       test_special.py
       test_usm_type.py
   VER_JSON_NAME: 'version.json'
diff --git a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp
index 50440cdebc81..30310162582f 100644
--- a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp
@@ -811,8 +811,8 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap)
     fmap[DPNPFuncName::DPNP_FN_COPY_EXT][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_copy_c_ext<int64_t>};
     fmap[DPNPFuncName::DPNP_FN_COPY_EXT][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_copy_c_ext<float>};
     fmap[DPNPFuncName::DPNP_FN_COPY_EXT][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_copy_c_ext<double>};
-    fmap[DPNPFuncName::DPNP_FN_COPY_EXT][eft_C128][eft_C128] = {eft_C128,
-                                                                (void*)dpnp_copy_c_ext<std::complex<double>>};
+    fmap[DPNPFuncName::DPNP_FN_COPY_EXT][eft_C64][eft_C64] = {eft_C64, (void*)dpnp_copy_c_ext<std::complex<float>>};
+    fmap[DPNPFuncName::DPNP_FN_COPY_EXT][eft_C128][eft_C128] = {eft_C128, (void*)dpnp_copy_c_ext<std::complex<double>>};
 
     fmap[DPNPFuncName::DPNP_FN_ERF][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_erf_c_default<int32_t>};
     fmap[DPNPFuncName::DPNP_FN_ERF][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_erf_c_default<int64_t>};
diff --git a/tests/skipped_tests.tbl b/tests/skipped_tests.tbl
index a41b881ae7b3..707c0b897244 100644
--- a/tests/skipped_tests.tbl
+++ b/tests/skipped_tests.tbl
@@ -202,8 +202,6 @@ tests/test_random.py::TestPermutationsTestShuffle::test_shuffle1[lambda x: (dpnp
 tests/test_random.py::TestPermutationsTestShuffle::test_shuffle1[lambda x: dpnp.asarray([(i, i) for i in x], [("a", object), ("b", dpnp.int32)])]]
 tests/test_random.py::TestPermutationsTestShuffle::test_shuffle1[lambda x: dpnp.asarray(x).astype(dpnp.int8)]
 
-tests/test_sort.py::test_partition[[[1, 0], [3, 0]]-float32-1]
-
 tests/third_party/cupy/core_tests/test_ndarray_complex_ops.py::TestAngle::test_angle
 tests/third_party/cupy/core_tests/test_ndarray_complex_ops.py::TestRealImag::test_imag
 tests/third_party/cupy/core_tests/test_ndarray_complex_ops.py::TestRealImag::test_imag_inplace
diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl
index 36b17d5edbc3..1f50dfd5b506 100644
--- a/tests/skipped_tests_gpu.tbl
+++ b/tests/skipped_tests_gpu.tbl
@@ -349,7 +349,6 @@ tests/test_linalg.py::test_matrix_rank[None-[[1, 2], [3, 4]]-int32]
 tests/test_random.py::TestPermutationsTestShuffle::test_shuffle1[lambda x: (dpnp.asarray([(i, i) for i in x], [("a", int), ("b", int)]).view(dpnp.recarray))]
 tests/test_random.py::TestPermutationsTestShuffle::test_shuffle1[lambda x: dpnp.asarray([(i, i) for i in x], [("a", object), ("b", dpnp.int32)])]]
 tests/test_random.py::TestPermutationsTestShuffle::test_shuffle1[lambda x: dpnp.asarray(x).astype(dpnp.int8)]
-tests/test_sort.py::test_partition[[[1, 0], [3, 0]]-float32-1]
 
 tests/third_party/cupy/core_tests/test_ndarray_complex_ops.py::TestAngle::test_angle
 tests/third_party/cupy/core_tests/test_ndarray_complex_ops.py::TestRealImag::test_imag
diff --git a/tests/test_sort.py b/tests/test_sort.py
index 975c654cbb94..cfcef2c3db05 100644
--- a/tests/test_sort.py
+++ b/tests/test_sort.py
@@ -30,8 +30,11 @@ def test_partition(array, dtype, kth):
     a = dpnp.array(array, dtype)
     p = dpnp.partition(a, kth)
 
-    assert (p[0:kth] <= p[kth]).all()
-    assert (p[kth] <= p[kth + 1:]).all()
+    # TODO: remove once dpnp.less_equal() support complex types
+    p = p.asnumpy()
+
+    assert (p[..., 0:kth] <= p[..., kth:kth + 1]).all()
+    assert (p[..., kth:kth + 1] <= p[..., kth + 1:]).all()
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")

From 5c67ba313fd9eca4a8a2db648d787e4f7f9d126b Mon Sep 17 00:00:00 2001
From: Anton Volkov <antonwolfy@gmail.com>
Date: Tue, 23 May 2023 08:40:56 -0500
Subject: [PATCH 103/129] Added dpnp.ndarray.partition() & unmuted cupy tests

---
 .github/workflows/conda-package.yml           |  1 +
 dpnp/dpnp_array.py                            | 27 ++++++++-
 tests/skipped_tests.tbl                       | 38 +-----------
 tests/skipped_tests_gpu.tbl                   | 59 +------------------
 .../cupy/sorting_tests/test_sort.py           | 24 +++-----
 5 files changed, 40 insertions(+), 109 deletions(-)

diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml
index 83173e6d9673..e42adbdc913d 100644
--- a/.github/workflows/conda-package.yml
+++ b/.github/workflows/conda-package.yml
@@ -21,6 +21,7 @@ env:
       test_sort.py
       test_special.py
       test_usm_type.py
+      third_party/cupy/sorting_tests/test_sort.py
   VER_JSON_NAME: 'version.json'
   VER_SCRIPT1: "import json; f = open('version.json', 'r'); j = json.load(f); f.close(); "
   VER_SCRIPT2: "d = j['dpnp'][0]; print('='.join((d[s] for s in ('version', 'build'))))"
diff --git a/dpnp/dpnp_array.py b/dpnp/dpnp_array.py
index 5741ea0fa145..3a3d4027d787 100644
--- a/dpnp/dpnp_array.py
+++ b/dpnp/dpnp_array.py
@@ -832,7 +832,32 @@ def ndim(self):
     def nonzero(self):
         return dpnp.nonzero(self)
 
- # 'partition',
+    def partition(self, kth, axis=-1, kind='introselect', order=None):
+        """
+        Rearranges the elements in the array in such a way that the value of the
+        element in kth position is in the position it would be in a sorted array.
+
+        All elements smaller than the kth element are moved before this element and
+        all equal or greater are moved behind it. The ordering of the elements in
+        the two partitions is undefined.
+
+        Refer to `dpnp.partition` for full documentation.
+
+        See Also
+        --------
+        :obj:`dpnp.partition` : Return a partitioned copy of an array.
+
+        Examples
+        --------
+        >>> import dpnp as np
+        >>> a = np.array([3, 4, 2, 1])
+        >>> a.partition(3)
+        >>> a
+        array([1, 2, 3, 4])
+
+        """
+
+        self._array_obj = dpnp.partition(self, kth, axis=axis, kind=kind, order=order).get_array()
 
     def prod(self, axis=None, dtype=None, out=None, keepdims=False, initial=None, where=True):
         """
diff --git a/tests/skipped_tests.tbl b/tests/skipped_tests.tbl
index 707c0b897244..81c6ea15419f 100644
--- a/tests/skipped_tests.tbl
+++ b/tests/skipped_tests.tbl
@@ -1179,18 +1179,7 @@ tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external
 tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_argsort_original_array_not_modified_multi_dim
 tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_argsort_original_array_not_modified_one_dim
 tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_argsort_zero_dim
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}::test_partition_invalid_axis1
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}::test_partition_invalid_axis2
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}::test_partition_invalid_kth
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}::test_partition_invalid_negative_axis1
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}::test_partition_invalid_negative_axis2
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}::test_partition_invalid_negative_kth
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}::test_partition_invalid_axis1
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}::test_partition_invalid_axis2
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}::test_partition_invalid_kth
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}::test_partition_invalid_negative_axis1
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}::test_partition_invalid_negative_axis2
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}::test_partition_invalid_negative_kth
+
 tests/third_party/cupy/sorting_tests/test_sort.py::TestLexsort::test_F_order
 tests/third_party/cupy/sorting_tests/test_sort.py::TestLexsort::test_lexsort_dtype
 tests/third_party/cupy/sorting_tests/test_sort.py::TestLexsort::test_lexsort_three_or_more_dim
@@ -1200,30 +1189,7 @@ tests/third_party/cupy/sorting_tests/test_sort.py::TestLexsort::test_nan3
 tests/third_party/cupy/sorting_tests/test_sort.py::TestLexsort::test_view
 tests/third_party/cupy/sorting_tests/test_sort.py::TestMsort::test_msort_multi_dim
 tests/third_party/cupy/sorting_tests/test_sort.py::TestMsort::test_msort_one_dim
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}::test_partition_axis
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}::test_partition_multi_dim
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}::test_partition_negative_axis
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}::test_partition_negative_kth
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}::test_partition_non_contiguous
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}::test_partition_one_dim
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}::test_partition_sequence_kth
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}::test_partition_zero_dim
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}::test_partition_axis
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}::test_partition_multi_dim
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}::test_partition_negative_axis
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}::test_partition_negative_kth
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}::test_partition_non_contiguous
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}::test_partition_one_dim
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}::test_partition_sequence_kth
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}::test_partition_zero_dim
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_2_{external=True, length=10}::test_partition_multi_dim
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_2_{external=True, length=10}::test_partition_negative_kth
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_2_{external=True, length=10}::test_partition_one_dim
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_2_{external=True, length=10}::test_partition_zero_dim
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_3_{external=True, length=20000}::test_partition_multi_dim
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_3_{external=True, length=20000}::test_partition_negative_kth
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_3_{external=True, length=20000}::test_partition_one_dim
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_3_{external=True, length=20000}::test_partition_zero_dim
+
 tests/third_party/cupy/sorting_tests/test_sort.py::TestSort_complex::test_sort_complex_1dim
 tests/third_party/cupy/sorting_tests/test_sort.py::TestSort_complex::test_sort_complex_nan
 tests/third_party/cupy/sorting_tests/test_sort.py::TestSort_complex::test_sort_complex_ndim
diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl
index 1f50dfd5b506..b99492832ff9 100644
--- a/tests/skipped_tests_gpu.tbl
+++ b/tests/skipped_tests_gpu.tbl
@@ -257,28 +257,9 @@ tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsMult
 tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsMultivariateNormal_param_2_{d=4, shape=(4, 3, 2)}::test_normal
 tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsMultivariateNormal_param_3_{d=4, shape=(3, 2)}::test_normal
 
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}::test_partition_invalid_axis1
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}::test_partition_invalid_axis2
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}::test_partition_invalid_kth
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}::test_partition_invalid_negative_axis1
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}::test_partition_invalid_negative_axis2
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}::test_partition_invalid_negative_kth
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}::test_partition_invalid_axis1
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}::test_partition_invalid_axis2
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}::test_partition_invalid_kth
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}::test_partition_invalid_negative_axis1
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}::test_partition_invalid_negative_axis2
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}::test_partition_invalid_negative_kth
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_2_{external=True, length=10}::test_partition_axis
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_2_{external=True, length=10}::test_partition_negative_axis
 tests/third_party/cupy/statistics_tests/test_correlation.py::TestCov::test_cov_empty
 tests/third_party/cupy/statistics_tests/test_meanvar.py::TestMeanVar::test_external_mean_axis
 
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_3_{external=True, length=20000}::test_partition_axis
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_3_{external=True, length=20000}::test_partition_negative_axis
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_3_{external=True, length=20000}::test_partition_none_axis
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_3_{external=True, length=20000}::test_partition_sequence_kth
-
 tests/third_party/intel/test_zero_copy_test1.py::test_dpnp_interaction_with_dpctl_memory
 tests/test_arraymanipulation.py::TestHstack::test_generator
 tests/test_arraymanipulation.py::TestVstack::test_generator
@@ -1299,18 +1280,7 @@ tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external
 tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_argsort_original_array_not_modified_multi_dim
 tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_argsort_original_array_not_modified_one_dim
 tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_argsort_zero_dim
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}
+
 tests/third_party/cupy/sorting_tests/test_sort.py::TestLexsort::test_F_order
 tests/third_party/cupy/sorting_tests/test_sort.py::TestLexsort::test_lexsort_dtype
 tests/third_party/cupy/sorting_tests/test_sort.py::TestLexsort::test_lexsort_three_or_more_dim
@@ -1320,32 +1290,7 @@ tests/third_party/cupy/sorting_tests/test_sort.py::TestLexsort::test_nan3
 tests/third_party/cupy/sorting_tests/test_sort.py::TestLexsort::test_view
 tests/third_party/cupy/sorting_tests/test_sort.py::TestMsort::test_msort_multi_dim
 tests/third_party/cupy/sorting_tests/test_sort.py::TestMsort::test_msort_one_dim
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}::test_partition_axis
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}::test_partition_multi_dim
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}::test_partition_negative_axis
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}::test_partition_negative_kth
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}::test_partition_non_contiguous
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}::test_partition_one_dim
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}::test_partition_sequence_kth
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}::test_partition_zero_dim
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}::test_partition_axis
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}::test_partition_multi_dim
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}::test_partition_negative_axis
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}::test_partition_negative_kth
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}::test_partition_non_contiguous
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}::test_partition_one_dim
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}::test_partition_sequence_kth
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}::test_partition_zero_dim
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_2_{external=True, length=10}::test_partition_multi_dim
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_2_{external=True, length=10}::test_partition_negative_kth
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_2_{external=True, length=10}::test_partition_non_contiguous
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_2_{external=True, length=10}::test_partition_one_dim
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_2_{external=True, length=10}::test_partition_zero_dim
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_3_{external=True, length=20000}::test_partition_multi_dim
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_3_{external=True, length=20000}::test_partition_negative_kth
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_3_{external=True, length=20000}::test_partition_non_contiguous
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_3_{external=True, length=20000}::test_partition_one_dim
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_3_{external=True, length=20000}::test_partition_zero_dim
+
 tests/third_party/cupy/sorting_tests/test_sort.py::TestSort_complex::test_sort_complex_1dim
 tests/third_party/cupy/sorting_tests/test_sort.py::TestSort_complex::test_sort_complex_nan
 tests/third_party/cupy/sorting_tests/test_sort.py::TestSort_complex::test_sort_complex_ndim
diff --git a/tests/third_party/cupy/sorting_tests/test_sort.py b/tests/third_party/cupy/sorting_tests/test_sort.py
index f26d13cf537b..eb3cc8a1a803 100644
--- a/tests/third_party/cupy/sorting_tests/test_sort.py
+++ b/tests/third_party/cupy/sorting_tests/test_sort.py
@@ -469,7 +469,6 @@ def test_sort_complex_nan(self, xp, dtype):
     'length': [10, 20000],
 }))
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
-@testing.gpu
 class TestPartition(unittest.TestCase):
 
     def partition(self, a, kth, axis=-1):
@@ -495,18 +494,19 @@ def test_partition_one_dim(self, xp, dtype):
         a = testing.shaped_random((self.length,), xp, dtype)
         kth = 2
         x = self.partition(a, kth)
-        self.assertTrue(xp.all(x[0:kth] <= x[kth:kth + 1]))
-        self.assertTrue(xp.all(x[kth:kth + 1] <= x[kth + 1:]))
+        assert xp.all(x[0:kth] <= x[kth:kth + 1])
+        assert xp.all(x[kth:kth + 1] <= x[kth + 1:])
         return x[kth]
 
+    @pytest.mark.skip("multidimensional case doesn't work properly")
     @testing.for_all_dtypes()
     @testing.numpy_cupy_array_equal()
     def test_partition_multi_dim(self, xp, dtype):
         a = testing.shaped_random((10, 10, self.length), xp, dtype)
         kth = 2
         x = self.partition(a, kth)
-        self.assertTrue(xp.all(x[:, :, 0:kth] <= x[:, :, kth:kth + 1]))
-        self.assertTrue(xp.all(x[:, :, kth:kth + 1] <= x[:, :, kth + 1:]))
+        assert xp.all(x[:, :, 0:kth] <= x[:, :, kth:kth + 1])
+        assert xp.all(x[:, :, kth:kth + 1] <= x[:, :, kth + 1:])
         return x[:, :, kth:kth + 1]
 
     # Test non-contiguous array
@@ -515,16 +515,10 @@ def test_partition_multi_dim(self, xp, dtype):
     def test_partition_non_contiguous(self, xp):
         a = testing.shaped_random((self.length,), xp)[::-1]
         kth = 2
-        if not self.external:
-            if xp is cupy:
-                with self.assertRaises(NotImplementedError):
-                    return self.partition(a, kth)
-            return 0  # dummy
-        else:
-            x = self.partition(a, kth)
-            self.assertTrue(xp.all(x[0:kth] <= x[kth:kth + 1]))
-            self.assertTrue(xp.all(x[kth:kth + 1] <= x[kth + 1:]))
-            return x[kth]
+        x = self.partition(a, kth)
+        assert xp.all(x[0:kth] <= x[kth:kth + 1])
+        assert xp.all(x[kth:kth + 1] <= x[kth + 1:])
+        return x[kth]
 
     # Test kth
 

From 340ec405e44d9da1811abb018a5c11380c5d470f Mon Sep 17 00:00:00 2001
From: vlad-perevezentsev <vladislav.perevezentsev@intel.com>
Date: Wed, 31 May 2023 14:40:13 +0200
Subject: [PATCH 104/129] Fix generation of coverage in GitHub Action (#1397)

* Update skipeped_tests.tbl
* Use pip install coveralls with pinned version 3.2.0
---
 .github/workflows/generate_coverage.yaml |  2 +-
 tests/skipped_tests.tbl                  | 12 ++++++++++++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/generate_coverage.yaml b/.github/workflows/generate_coverage.yaml
index 6939681d3637..4d054274502f 100644
--- a/.github/workflows/generate_coverage.yaml
+++ b/.github/workflows/generate_coverage.yaml
@@ -54,7 +54,7 @@ jobs:
       - name: Install coverall dependencies
         run: |
           sudo gem install coveralls-lcov
-          conda install coveralls
+          pip install coveralls==3.2.0
       - name: Upload coverage data to coveralls.io
         run: |
           echo "Processing pytest-coverage"
diff --git a/tests/skipped_tests.tbl b/tests/skipped_tests.tbl
index 81c6ea15419f..68bc5c8bc1dc 100644
--- a/tests/skipped_tests.tbl
+++ b/tests/skipped_tests.tbl
@@ -267,6 +267,8 @@ tests/third_party/cupy/core_tests/test_ndarray_reduction.py::TestArrayReduction:
 tests/third_party/cupy/core_tests/test_ndarray_reduction.py::TestArrayReduction::test_ptp_multiple_axes
 tests/third_party/cupy/core_tests/test_ndarray_reduction.py::TestArrayReduction::test_ptp_multiple_axes_keepdims
 tests/third_party/cupy/core_tests/test_ndarray_reduction.py::TestArrayReduction::test_ptp_nan
+tests/third_party/cupy/core_tests/test_ndarray_reduction.py::TestArrayReduction::test_ptp_nan_imag
+tests/third_party/cupy/core_tests/test_ndarray_reduction.py::TestArrayReduction::test_ptp_nan_real
 
 tests/third_party/cupy/core_tests/test_ndarray_reduction.py::TestCubReduction_param_0_{order='C', shape=(10,)}::test_cub_max
 tests/third_party/cupy/core_tests/test_ndarray_reduction.py::TestCubReduction_param_0_{order='C', shape=(10,)}::test_cub_min
@@ -484,11 +486,14 @@ tests/third_party/cupy/indexing_tests/test_indexing.py::TestIndexing::test_take_
 tests/third_party/cupy/indexing_tests/test_indexing.py::TestSelect::test_select
 tests/third_party/cupy/indexing_tests/test_indexing.py::TestSelect::test_select_1D_choicelist
 tests/third_party/cupy/indexing_tests/test_indexing.py::TestSelect::test_select_choicelist_condlist_broadcast
+tests/third_party/cupy/indexing_tests/test_indexing.py::TestSelect::test_select_complex
 tests/third_party/cupy/indexing_tests/test_indexing.py::TestSelect::test_select_default
+tests/third_party/cupy/indexing_tests/test_indexing.py::TestSelect::test_select_default_complex
 tests/third_party/cupy/indexing_tests/test_indexing.py::TestSelect::test_select_default_scalar
 tests/third_party/cupy/indexing_tests/test_indexing.py::TestSelect::test_select_empty_lists
 tests/third_party/cupy/indexing_tests/test_indexing.py::TestSelect::test_select_length_error
 tests/third_party/cupy/indexing_tests/test_indexing.py::TestSelect::test_select_odd_shaped_broadcastable
+tests/third_party/cupy/indexing_tests/test_indexing.py::TestSelect::test_select_odd_shaped_broadcastable_complex
 tests/third_party/cupy/indexing_tests/test_indexing.py::TestSelect::test_select_odd_shaped_non_broadcastable
 tests/third_party/cupy/indexing_tests/test_indexing.py::TestSelect::test_select_type_error_condlist
 tests/third_party/cupy/indexing_tests/test_indexing.py::TestIndexing::test_diagonal
@@ -829,9 +834,14 @@ tests/third_party/cupy/math_tests/test_rounding.py::TestRoundExtreme_param_5_{de
 tests/third_party/cupy/math_tests/test_rounding.py::TestRoundExtreme_param_5_{decimals=99}::test_round_small
 tests/third_party/cupy/math_tests/test_rounding.py::TestRoundExtreme_param_6_{decimals=100}::test_round_large
 tests/third_party/cupy/math_tests/test_rounding.py::TestRoundExtreme_param_6_{decimals=100}::test_round_small
+tests/third_party/cupy/math_tests/test_rounding.py::TestRounding::test_around
+tests/third_party/cupy/math_tests/test_rounding.py::TestRounding::test_ceil
 tests/third_party/cupy/math_tests/test_rounding.py::TestRounding::test_fix
+tests/third_party/cupy/math_tests/test_rounding.py::TestRounding::test_floor
 tests/third_party/cupy/math_tests/test_rounding.py::TestRounding::test_rint
 tests/third_party/cupy/math_tests/test_rounding.py::TestRounding::test_rint_negative
+tests/third_party/cupy/math_tests/test_rounding.py::TestRounding::test_round_
+tests/third_party/cupy/math_tests/test_rounding.py::TestRounding::test_trunc
 tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_sum_all
 tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_sum_all2
 tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_sum_all_keepdims
@@ -1283,6 +1293,8 @@ tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_b
 tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram
 tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_array_bins
 tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_bins_not_ordered
+tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_complex_weights
+tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_complex_weights_uneven_bins
 tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_density
 tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_empty
 tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_float_weights

From 30dedf54b2001f32c01c52b223c6e74cbe0479c2 Mon Sep 17 00:00:00 2001
From: Anton Volkov <antonwolfy@gmail.com>
Date: Tue, 2 May 2023 04:48:47 -0500
Subject: [PATCH 105/129] Add type dispatching to pybind11 extension of
 dpnp.linalg.eigh()

---
 dpnp/backend/extensions/lapack/CMakeLists.txt |   1 +
 dpnp/backend/extensions/lapack/heevd.cpp      |  99 ++++++++++------
 dpnp/backend/extensions/lapack/heevd.hpp      |   2 +
 dpnp/backend/extensions/lapack/lapack_py.cpp  |  32 +++++-
 dpnp/backend/extensions/lapack/syevd.cpp      | 107 ++++++++++++------
 dpnp/backend/extensions/lapack/syevd.hpp      |   2 +
 .../extensions/lapack/types_matrix.hpp        |  88 ++++++++++++++
 7 files changed, 255 insertions(+), 76 deletions(-)
 create mode 100644 dpnp/backend/extensions/lapack/types_matrix.hpp

diff --git a/dpnp/backend/extensions/lapack/CMakeLists.txt b/dpnp/backend/extensions/lapack/CMakeLists.txt
index 8040fb433cb9..cfeb753168e9 100644
--- a/dpnp/backend/extensions/lapack/CMakeLists.txt
+++ b/dpnp/backend/extensions/lapack/CMakeLists.txt
@@ -45,6 +45,7 @@ target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_
 target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../src)
 
 target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIRS})
+target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIRS}/../tensor/libtensor/include)
 
 if (WIN32)
   target_compile_options(${python_module_name} PRIVATE
diff --git a/dpnp/backend/extensions/lapack/heevd.cpp b/dpnp/backend/extensions/lapack/heevd.cpp
index 8c943646ff0a..bfb18697a56e 100644
--- a/dpnp/backend/extensions/lapack/heevd.cpp
+++ b/dpnp/backend/extensions/lapack/heevd.cpp
@@ -26,7 +26,13 @@
 
 #include <pybind11/pybind11.h>
 
+// dpctl tensor headers
+#include "utils/memory_overlap.hpp"
+#include "utils/type_dispatch.hpp"
+#include "utils/type_utils.hpp"
+
 #include "heevd.hpp"
+#include "types_matrix.hpp"
 
 #include "dpnp_utils.hpp"
 
@@ -42,19 +48,35 @@ namespace lapack
 
 namespace mkl_lapack = oneapi::mkl::lapack;
 namespace py = pybind11;
+namespace type_dispatch = dpctl::tensor::type_dispatch;
+namespace type_utils = dpctl::tensor::type_utils;
+
+typedef sycl::event (*heevd_impl_fn_ptr_t)(sycl::queue,
+                                           const oneapi::mkl::job,
+                                           const oneapi::mkl::uplo,
+                                           const std::int64_t,
+                                           char*,
+                                           char*,
+                                           std::vector<sycl::event>&,
+                                           const std::vector<sycl::event>&);
+
+static heevd_impl_fn_ptr_t heevd_dispatch_table[type_dispatch::num_types][type_dispatch::num_types];
 
 template <typename T, typename RealT>
-static sycl::event call_heevd(sycl::queue exec_q,
+static sycl::event heevd_impl(sycl::queue exec_q,
                               const oneapi::mkl::job jobz,
                               const oneapi::mkl::uplo upper_lower,
                               const std::int64_t n,
-                              T* a,
-                              RealT* w,
+                              char* in_a,
+                              char* out_w,
                               std::vector<sycl::event>& host_task_events,
                               const std::vector<sycl::event>& depends)
 {
-    validate_type_for_device<T>(exec_q);
-    validate_type_for_device<RealT>(exec_q);
+    type_utils::validate_type_for_device<T>(exec_q);
+    type_utils::validate_type_for_device<RealT>(exec_q);
+
+    T* a = reinterpret_cast<T*>(in_a);
+    RealT* w = reinterpret_cast<RealT*>(out_w);
 
     const std::int64_t lda = std::max<size_t>(1UL, n);
     const std::int64_t scratchpad_size = mkl_lapack::heevd_scratchpad_size<T>(exec_q, jobz, upper_lower, n, lda);
@@ -163,13 +185,11 @@ std::pair<sycl::event, sycl::event> heevd(sycl::queue exec_q,
         throw py::value_error("Execution queue is not compatible with allocation queues");
     }
 
-    // check that arrays do not overlap, and concurrent access is safe.
-    // TODO: need to be exposed by DPCTL headers
-    // auto const &overlap = dpctl::tensor::overlap::MemoryOverlap();
-    // if (overlap(eig_vecs, eig_vals))
-    // {
-    //     throw py::value_error("Arrays index overlapping segments of memory");
-    // }
+    auto const& overlap = dpctl::tensor::overlap::MemoryOverlap();
+    if (overlap(eig_vecs, eig_vals))
+    {
+        throw py::value_error("Arrays with eigenvectors and eigenvalues are overlapping segments of memory");
+    }
 
     bool is_eig_vecs_f_contig = eig_vecs.is_f_contiguous();
     bool is_eig_vals_c_contig = eig_vals.is_c_contiguous();
@@ -182,38 +202,51 @@ std::pair<sycl::event, sycl::event> heevd(sycl::queue exec_q,
         throw py::value_error("An array with output eigenvalues must be C-contiguous");
     }
 
-    int eig_vecs_typenum = eig_vecs.get_typenum();
-    int eig_vals_typenum = eig_vals.get_typenum();
-    auto const& dpctl_capi = dpctl::detail::dpctl_capi::get();
+    auto array_types = type_dispatch::usm_ndarray_types();
+    int eig_vecs_type_id = array_types.typenum_to_lookup_id(eig_vecs.get_typenum());
+    int eig_vals_type_id = array_types.typenum_to_lookup_id(eig_vals.get_typenum());
 
-    sycl::event heevd_ev;
-    std::vector<sycl::event> host_task_events;
+    heevd_impl_fn_ptr_t heevd_fn = heevd_dispatch_table[eig_vecs_type_id][eig_vals_type_id];
+    if (heevd_fn == nullptr)
+    {
+        throw py::value_error("No heevd implementation defined for a pair of type for eigenvectors and eigenvalues");
+    }
+
+    char* eig_vecs_data = eig_vecs.get_data();
+    char* eig_vals_data = eig_vals.get_data();
 
     const std::int64_t n = eig_vecs_shape[0];
     const oneapi::mkl::job jobz_val = static_cast<oneapi::mkl::job>(jobz);
     const oneapi::mkl::uplo uplo_val = static_cast<oneapi::mkl::uplo>(upper_lower);
 
-    if ((eig_vecs_typenum == dpctl_capi.UAR_CDOUBLE_) && (eig_vals_typenum == dpctl_capi.UAR_DOUBLE_))
-    {
-        std::complex<double>* a = reinterpret_cast<std::complex<double>*>(eig_vecs.get_data());
-        double* w = reinterpret_cast<double*>(eig_vals.get_data());
+    std::vector<sycl::event> host_task_events;
+    sycl::event heevd_ev =
+        heevd_fn(exec_q, jobz_val, uplo_val, n, eig_vecs_data, eig_vals_data, host_task_events, depends);
 
-        heevd_ev = call_heevd(exec_q, jobz_val, uplo_val, n, a, w, host_task_events, depends);
-    }
-    else if ((eig_vecs_typenum == dpctl_capi.UAR_CFLOAT_) && (eig_vals_typenum == dpctl_capi.UAR_FLOAT_))
-    {
-        std::complex<float>* a = reinterpret_cast<std::complex<float>*>(eig_vecs.get_data());
-        float* w = reinterpret_cast<float*>(eig_vals.get_data());
+    sycl::event args_ev = dpctl::utils::keep_args_alive(exec_q, {eig_vecs, eig_vals}, host_task_events);
+    return std::make_pair(args_ev, heevd_ev);
+}
 
-        heevd_ev = call_heevd(exec_q, jobz_val, uplo_val, n, a, w, host_task_events, depends);
-    }
-    else
+template <typename fnT, typename T, typename RealT>
+struct HeevdContigFactory
+{
+    fnT get()
     {
-        throw py::value_error("Unexpected types of either eigenvectors or eigenvalues");
+        if constexpr (types::HeevdTypePairSupportFactory<T, RealT>::is_defined)
+        {
+            return heevd_impl<T, RealT>;
+        }
+        else
+        {
+            return nullptr;
+        }
     }
+};
 
-    sycl::event args_ev = dpctl::utils::keep_args_alive(exec_q, {eig_vecs, eig_vals}, host_task_events);
-    return std::make_pair(args_ev, heevd_ev);
+void init_heevd_dispatch_table(void)
+{
+    type_dispatch::DispatchTableBuilder<heevd_impl_fn_ptr_t, HeevdContigFactory, type_dispatch::num_types> contig;
+    contig.populate_dispatch_table(heevd_dispatch_table);
 }
 }
 }
diff --git a/dpnp/backend/extensions/lapack/heevd.hpp b/dpnp/backend/extensions/lapack/heevd.hpp
index 93ce6fe560e1..85696d147f66 100644
--- a/dpnp/backend/extensions/lapack/heevd.hpp
+++ b/dpnp/backend/extensions/lapack/heevd.hpp
@@ -45,6 +45,8 @@ namespace lapack
                                                      dpctl::tensor::usm_ndarray eig_vecs,
                                                      dpctl::tensor::usm_ndarray eig_vals,
                                                      const std::vector<sycl::event>& depends);
+
+    extern void init_heevd_dispatch_table(void);
 }
 }
 }
diff --git a/dpnp/backend/extensions/lapack/lapack_py.cpp b/dpnp/backend/extensions/lapack/lapack_py.cpp
index ea7506308032..eaa3e6873b6a 100644
--- a/dpnp/backend/extensions/lapack/lapack_py.cpp
+++ b/dpnp/backend/extensions/lapack/lapack_py.cpp
@@ -33,25 +33,45 @@
 #include "heevd.hpp"
 #include "syevd.hpp"
 
+namespace lapack_ext = dpnp::backend::ext::lapack;
 namespace py = pybind11;
 
+// populate dispatch vectors
+void init_dispatch_vectors(void)
+{
+    lapack_ext::init_syevd_dispatch_vector();
+}
+
+// populate dispatch tables
+void init_dispatch_tables(void)
+{
+    lapack_ext::init_heevd_dispatch_table();
+}
+
 PYBIND11_MODULE(_lapack_impl, m)
 {
+    init_dispatch_vectors();
+    init_dispatch_tables();
+
     m.def("_heevd",
-          &dpnp::backend::ext::lapack::heevd,
+          &lapack_ext::heevd,
           "Call `heevd` from OneMKL LAPACK library to return "
           "the eigenvalues and eigenvectors of a complex Hermitian matrix",
           py::arg("sycl_queue"),
-          py::arg("jobz"), py::arg("upper_lower"),
-          py::arg("eig_vecs"), py::arg("eig_vals"),
+          py::arg("jobz"),
+          py::arg("upper_lower"),
+          py::arg("eig_vecs"),
+          py::arg("eig_vals"),
           py::arg("depends") = py::list());
 
     m.def("_syevd",
-          &dpnp::backend::ext::lapack::syevd,
+          &lapack_ext::syevd,
           "Call `syevd` from OneMKL LAPACK library to return "
           "the eigenvalues and eigenvectors of a real symmetric matrix",
           py::arg("sycl_queue"),
-          py::arg("jobz"), py::arg("upper_lower"),
-          py::arg("eig_vecs"), py::arg("eig_vals"),
+          py::arg("jobz"),
+          py::arg("upper_lower"),
+          py::arg("eig_vecs"),
+          py::arg("eig_vals"),
           py::arg("depends") = py::list());
 }
diff --git a/dpnp/backend/extensions/lapack/syevd.cpp b/dpnp/backend/extensions/lapack/syevd.cpp
index a4dded7543ab..4e4ce0001a32 100644
--- a/dpnp/backend/extensions/lapack/syevd.cpp
+++ b/dpnp/backend/extensions/lapack/syevd.cpp
@@ -26,7 +26,13 @@
 
 #include <pybind11/pybind11.h>
 
+// dpctl tensor headers
+#include "utils/memory_overlap.hpp"
+#include "utils/type_dispatch.hpp"
+#include "utils/type_utils.hpp"
+
 #include "syevd.hpp"
+#include "types_matrix.hpp"
 
 #include "dpnp_utils.hpp"
 
@@ -42,18 +48,34 @@ namespace lapack
 
 namespace mkl_lapack = oneapi::mkl::lapack;
 namespace py = pybind11;
+namespace type_dispatch = dpctl::tensor::type_dispatch;
+namespace type_utils = dpctl::tensor::type_utils;
+
+typedef sycl::event (*syevd_impl_fn_ptr_t)(sycl::queue,
+                                           const oneapi::mkl::job,
+                                           const oneapi::mkl::uplo,
+                                           const std::int64_t,
+                                           char*,
+                                           char*,
+                                           std::vector<sycl::event>&,
+                                           const std::vector<sycl::event>&);
+
+static syevd_impl_fn_ptr_t syevd_dispatch_vector[type_dispatch::num_types];
 
 template <typename T>
-static sycl::event call_syevd(sycl::queue exec_q,
+static sycl::event syevd_impl(sycl::queue exec_q,
                               const oneapi::mkl::job jobz,
                               const oneapi::mkl::uplo upper_lower,
                               const std::int64_t n,
-                              T* a,
-                              T* w,
+                              char* in_a,
+                              char* out_w,
                               std::vector<sycl::event>& host_task_events,
                               const std::vector<sycl::event>& depends)
 {
-    validate_type_for_device<T>(exec_q);
+    type_utils::validate_type_for_device<T>(exec_q);
+
+    T* a = reinterpret_cast<T*>(in_a);
+    T* w = reinterpret_cast<T*>(out_w);
 
     const std::int64_t lda = std::max<size_t>(1UL, n);
     const std::int64_t scratchpad_size = mkl_lapack::syevd_scratchpad_size<T>(exec_q, jobz, upper_lower, n, lda);
@@ -162,13 +184,11 @@ std::pair<sycl::event, sycl::event> syevd(sycl::queue exec_q,
         throw py::value_error("Execution queue is not compatible with allocation queues");
     }
 
-    // check that arrays do not overlap, and concurrent access is safe.
-    // TODO: need to be exposed by DPCTL headers
-    // auto const& overlap = dpctl::tensor::overlap::MemoryOverlap();
-    // if (overlap(eig_vecs, eig_vals))
-    // {
-    //     throw py::value_error("Arrays index overlapping segments of memory");
-    // }
+    auto const& overlap = dpctl::tensor::overlap::MemoryOverlap();
+    if (overlap(eig_vecs, eig_vals))
+    {
+        throw py::value_error("Arrays with eigenvectors and eigenvalues are overlapping segments of memory");
+    }
 
     bool is_eig_vecs_f_contig = eig_vecs.is_f_contiguous();
     bool is_eig_vals_c_contig = eig_vals.is_c_contiguous();
@@ -181,43 +201,56 @@ std::pair<sycl::event, sycl::event> syevd(sycl::queue exec_q,
         throw py::value_error("An array with output eigenvalues must be C-contiguous");
     }
 
-    int eig_vecs_typenum = eig_vecs.get_typenum();
-    int eig_vals_typenum = eig_vals.get_typenum();
-    auto const& dpctl_capi = dpctl::detail::dpctl_capi::get();
+    auto array_types = type_dispatch::usm_ndarray_types();
+    int eig_vecs_type_id = array_types.typenum_to_lookup_id(eig_vecs.get_typenum());
+    int eig_vals_type_id = array_types.typenum_to_lookup_id(eig_vals.get_typenum());
 
-    sycl::event syevd_ev;
-    std::vector<sycl::event> host_task_events;
+    if (eig_vecs_type_id != eig_vals_type_id)
+    {
+        throw py::value_error("Types of eigenvectors and eigenvalues are missmatched");
+    }
+
+    syevd_impl_fn_ptr_t syevd_fn = syevd_dispatch_vector[eig_vecs_type_id];
+    if (syevd_fn == nullptr)
+    {
+        throw py::value_error("No syevd implementation defined for a type of eigenvectors and eigenvalues");
+    }
+
+    char* eig_vecs_data = eig_vecs.get_data();
+    char* eig_vals_data = eig_vals.get_data();
 
     const std::int64_t n = eig_vecs_shape[0];
     const oneapi::mkl::job jobz_val = static_cast<oneapi::mkl::job>(jobz);
     const oneapi::mkl::uplo uplo_val = static_cast<oneapi::mkl::uplo>(upper_lower);
 
-    if (eig_vecs_typenum != eig_vals_typenum)
-    {
-        throw py::value_error("Types of eigenvectors and eigenvalues aare missmatched");
-    }
-    else if (eig_vecs_typenum == dpctl_capi.UAR_DOUBLE_)
-    {
-        double* a = reinterpret_cast<double*>(eig_vecs.get_data());
-        double* w = reinterpret_cast<double*>(eig_vals.get_data());
+    std::vector<sycl::event> host_task_events;
+    sycl::event syevd_ev =
+        syevd_fn(exec_q, jobz_val, uplo_val, n, eig_vecs_data, eig_vals_data, host_task_events, depends);
 
-        syevd_ev = call_syevd(exec_q, jobz_val, uplo_val, n, a, w, host_task_events, depends);
-    }
-    else if (eig_vecs_typenum == dpctl_capi.UAR_FLOAT_)
-    {
-        float* a = reinterpret_cast<float*>(eig_vecs.get_data());
-        float* w = reinterpret_cast<float*>(eig_vals.get_data());
+    sycl::event args_ev = dpctl::utils::keep_args_alive(exec_q, {eig_vecs, eig_vals}, host_task_events);
+    return std::make_pair(args_ev, syevd_ev);
+}
 
-        syevd_ev = call_syevd(exec_q, jobz_val, uplo_val, n, a, w, host_task_events, depends);
-    }
-    else
+template <typename fnT, typename T>
+struct SyevdContigFactory
+{
+    fnT get()
     {
-        throw py::value_error("Unexpected types with num=" + std::to_string(eig_vecs_typenum) +
-                              " for eigenvectors and eigenvalues");
+        if constexpr (types::SyevdTypePairSupportFactory<T>::is_defined)
+        {
+            return syevd_impl<T>;
+        }
+        else
+        {
+            return nullptr;
+        }
     }
+};
 
-    sycl::event args_ev = dpctl::utils::keep_args_alive(exec_q, {eig_vecs, eig_vals}, host_task_events);
-    return std::make_pair(args_ev, syevd_ev);
+void init_syevd_dispatch_vector(void)
+{
+    type_dispatch::DispatchVectorBuilder<syevd_impl_fn_ptr_t, SyevdContigFactory, type_dispatch::num_types> contig;
+    contig.populate_dispatch_vector(syevd_dispatch_vector);
 }
 }
 }
diff --git a/dpnp/backend/extensions/lapack/syevd.hpp b/dpnp/backend/extensions/lapack/syevd.hpp
index 14d167ec02a7..c5f0bc1b1531 100644
--- a/dpnp/backend/extensions/lapack/syevd.hpp
+++ b/dpnp/backend/extensions/lapack/syevd.hpp
@@ -45,6 +45,8 @@ namespace lapack
                                                      dpctl::tensor::usm_ndarray eig_vecs,
                                                      dpctl::tensor::usm_ndarray eig_vals,
                                                      const std::vector<sycl::event>& depends = {});
+
+    extern void init_syevd_dispatch_vector(void);
 }
 }
 }
diff --git a/dpnp/backend/extensions/lapack/types_matrix.hpp b/dpnp/backend/extensions/lapack/types_matrix.hpp
new file mode 100644
index 000000000000..66dda89e891d
--- /dev/null
+++ b/dpnp/backend/extensions/lapack/types_matrix.hpp
@@ -0,0 +1,88 @@
+//*****************************************************************************
+// Copyright (c) 2023, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#pragma once
+
+#include <type_traits>
+
+namespace dpnp
+{
+namespace backend
+{
+namespace ext
+{
+namespace lapack
+{
+namespace types
+{
+// TODO: to remove, will be provided by dpctl tensor headers
+template <typename Ty1, typename ArgTy1, typename Ty2 = Ty1, typename ArgTy2 = ArgTy1>
+struct TypePairEntry : std::bool_constant<std::conjunction_v<std::is_same<Ty1, ArgTy1>, std::is_same<Ty2, ArgTy2>>>
+{
+    static constexpr bool is_defined = true;
+};
+
+
+// TODO: to remove, will be provided by dpctl tensor headers
+struct NotFoundEntry : std::true_type
+{
+    static constexpr bool is_defined = false;
+};
+
+/**
+ * @brief A factory to define pairs of supported types for which
+ * MKL LAPACK library provides support in oneapi::mkl::lapack::heevd<T, RealT> function.
+ *
+ * @tparam T Type of array containing input matrix A and an output array with eigenvectors.
+ * @tparam RealT Type of output array containing eigenvalues of A.
+ */
+template <typename T, typename RealT>
+struct HeevdTypePairSupportFactory
+{
+    static constexpr bool is_defined = std::disjunction<TypePairEntry<T, std::complex<double>, RealT, double>,
+                                                        TypePairEntry<T, std::complex<float>, RealT, float>,
+                                                        // fall-through
+                                                        NotFoundEntry>::is_defined;
+};
+
+/**
+ * @brief A factory to define pairs of supported types for which
+ * MKL LAPACK library provides support in oneapi::mkl::lapack::syevd<T> function.
+ *
+ * @tparam T Type of array containing input matrix A and an output arrays with eigenvectors and eigenvectors.
+ */
+template <typename T>
+struct SyevdTypePairSupportFactory
+{
+    static constexpr bool is_defined = std::disjunction<TypePairEntry<T, double>,
+                                                        TypePairEntry<T, float>,
+                                                        // fall-through
+                                                        NotFoundEntry>::is_defined;
+};
+}
+}
+}
+}
+}

From b90e391542fd68196736b67b405dcb869eab54a3 Mon Sep 17 00:00:00 2001
From: Anton Volkov <antonwolfy@gmail.com>
Date: Wed, 31 May 2023 04:54:55 -0500
Subject: [PATCH 106/129] Added dep on dpctl tensor headers and removed todo

---
 dpnp/backend/extensions/lapack/CMakeLists.txt |  2 +-
 .../extensions/lapack/types_matrix.hpp        | 26 +++++--------------
 2 files changed, 7 insertions(+), 21 deletions(-)

diff --git a/dpnp/backend/extensions/lapack/CMakeLists.txt b/dpnp/backend/extensions/lapack/CMakeLists.txt
index cfeb753168e9..dea062935355 100644
--- a/dpnp/backend/extensions/lapack/CMakeLists.txt
+++ b/dpnp/backend/extensions/lapack/CMakeLists.txt
@@ -45,7 +45,7 @@ target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_
 target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../src)
 
 target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIRS})
-target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIRS}/../tensor/libtensor/include)
+target_include_directories(${py_module_name} PUBLIC ${Dpctl_TENSOR_INCLUDE_DIR})
 
 if (WIN32)
   target_compile_options(${python_module_name} PRIVATE
diff --git a/dpnp/backend/extensions/lapack/types_matrix.hpp b/dpnp/backend/extensions/lapack/types_matrix.hpp
index 66dda89e891d..ebfaf3cfb125 100644
--- a/dpnp/backend/extensions/lapack/types_matrix.hpp
+++ b/dpnp/backend/extensions/lapack/types_matrix.hpp
@@ -37,20 +37,6 @@ namespace lapack
 {
 namespace types
 {
-// TODO: to remove, will be provided by dpctl tensor headers
-template <typename Ty1, typename ArgTy1, typename Ty2 = Ty1, typename ArgTy2 = ArgTy1>
-struct TypePairEntry : std::bool_constant<std::conjunction_v<std::is_same<Ty1, ArgTy1>, std::is_same<Ty2, ArgTy2>>>
-{
-    static constexpr bool is_defined = true;
-};
-
-
-// TODO: to remove, will be provided by dpctl tensor headers
-struct NotFoundEntry : std::true_type
-{
-    static constexpr bool is_defined = false;
-};
-
 /**
  * @brief A factory to define pairs of supported types for which
  * MKL LAPACK library provides support in oneapi::mkl::lapack::heevd<T, RealT> function.
@@ -61,10 +47,10 @@ struct NotFoundEntry : std::true_type
 template <typename T, typename RealT>
 struct HeevdTypePairSupportFactory
 {
-    static constexpr bool is_defined = std::disjunction<TypePairEntry<T, std::complex<double>, RealT, double>,
-                                                        TypePairEntry<T, std::complex<float>, RealT, float>,
+    static constexpr bool is_defined = std::disjunction<TypePairDefinedEntry<T, std::complex<double>, RealT, double>,
+                                                        TypePairDefinedEntry<T, std::complex<float>, RealT, float>,
                                                         // fall-through
-                                                        NotFoundEntry>::is_defined;
+                                                        NotDefinedEntry>::is_defined;
 };
 
 /**
@@ -76,10 +62,10 @@ struct HeevdTypePairSupportFactory
 template <typename T>
 struct SyevdTypePairSupportFactory
 {
-    static constexpr bool is_defined = std::disjunction<TypePairEntry<T, double>,
-                                                        TypePairEntry<T, float>,
+    static constexpr bool is_defined = std::disjunction<TypePairDefinedEntry<T, double, T, double>,
+                                                        TypePairDefinedEntry<T, float, T, float>,
                                                         // fall-through
-                                                        NotFoundEntry>::is_defined;
+                                                        NotDefinedEntry>::is_defined;
 };
 }
 }

From 675fbc31239ef6b7501086cc2a8947fc43439f0e Mon Sep 17 00:00:00 2001
From: Anton Volkov <antonwolfy@gmail.com>
Date: Wed, 31 May 2023 05:23:51 -0500
Subject: [PATCH 107/129] Resolved compilation errors

---
 dpnp/backend/extensions/lapack/CMakeLists.txt   |  2 +-
 dpnp/backend/extensions/lapack/heevd.cpp        |  8 +++-----
 dpnp/backend/extensions/lapack/syevd.cpp        |  8 +++-----
 dpnp/backend/extensions/lapack/types_matrix.hpp | 16 ++++++++++------
 4 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/dpnp/backend/extensions/lapack/CMakeLists.txt b/dpnp/backend/extensions/lapack/CMakeLists.txt
index dea062935355..e54de4068c01 100644
--- a/dpnp/backend/extensions/lapack/CMakeLists.txt
+++ b/dpnp/backend/extensions/lapack/CMakeLists.txt
@@ -45,7 +45,7 @@ target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_
 target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../src)
 
 target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIRS})
-target_include_directories(${py_module_name} PUBLIC ${Dpctl_TENSOR_INCLUDE_DIR})
+target_include_directories(${python_module_name} PUBLIC ${Dpctl_TENSOR_INCLUDE_DIR})
 
 if (WIN32)
   target_compile_options(${python_module_name} PRIVATE
diff --git a/dpnp/backend/extensions/lapack/heevd.cpp b/dpnp/backend/extensions/lapack/heevd.cpp
index bfb18697a56e..f99fb94c18ec 100644
--- a/dpnp/backend/extensions/lapack/heevd.cpp
+++ b/dpnp/backend/extensions/lapack/heevd.cpp
@@ -28,7 +28,6 @@
 
 // dpctl tensor headers
 #include "utils/memory_overlap.hpp"
-#include "utils/type_dispatch.hpp"
 #include "utils/type_utils.hpp"
 
 #include "heevd.hpp"
@@ -48,7 +47,6 @@ namespace lapack
 
 namespace mkl_lapack = oneapi::mkl::lapack;
 namespace py = pybind11;
-namespace type_dispatch = dpctl::tensor::type_dispatch;
 namespace type_utils = dpctl::tensor::type_utils;
 
 typedef sycl::event (*heevd_impl_fn_ptr_t)(sycl::queue,
@@ -60,7 +58,7 @@ typedef sycl::event (*heevd_impl_fn_ptr_t)(sycl::queue,
                                            std::vector<sycl::event>&,
                                            const std::vector<sycl::event>&);
 
-static heevd_impl_fn_ptr_t heevd_dispatch_table[type_dispatch::num_types][type_dispatch::num_types];
+static heevd_impl_fn_ptr_t heevd_dispatch_table[dpctl_td_ns::num_types][dpctl_td_ns::num_types];
 
 template <typename T, typename RealT>
 static sycl::event heevd_impl(sycl::queue exec_q,
@@ -202,7 +200,7 @@ std::pair<sycl::event, sycl::event> heevd(sycl::queue exec_q,
         throw py::value_error("An array with output eigenvalues must be C-contiguous");
     }
 
-    auto array_types = type_dispatch::usm_ndarray_types();
+    auto array_types = dpctl_td_ns::usm_ndarray_types();
     int eig_vecs_type_id = array_types.typenum_to_lookup_id(eig_vecs.get_typenum());
     int eig_vals_type_id = array_types.typenum_to_lookup_id(eig_vals.get_typenum());
 
@@ -245,7 +243,7 @@ struct HeevdContigFactory
 
 void init_heevd_dispatch_table(void)
 {
-    type_dispatch::DispatchTableBuilder<heevd_impl_fn_ptr_t, HeevdContigFactory, type_dispatch::num_types> contig;
+    dpctl_td_ns::DispatchTableBuilder<heevd_impl_fn_ptr_t, HeevdContigFactory, dpctl_td_ns::num_types> contig;
     contig.populate_dispatch_table(heevd_dispatch_table);
 }
 }
diff --git a/dpnp/backend/extensions/lapack/syevd.cpp b/dpnp/backend/extensions/lapack/syevd.cpp
index 4e4ce0001a32..d03c2dff372c 100644
--- a/dpnp/backend/extensions/lapack/syevd.cpp
+++ b/dpnp/backend/extensions/lapack/syevd.cpp
@@ -28,7 +28,6 @@
 
 // dpctl tensor headers
 #include "utils/memory_overlap.hpp"
-#include "utils/type_dispatch.hpp"
 #include "utils/type_utils.hpp"
 
 #include "syevd.hpp"
@@ -48,7 +47,6 @@ namespace lapack
 
 namespace mkl_lapack = oneapi::mkl::lapack;
 namespace py = pybind11;
-namespace type_dispatch = dpctl::tensor::type_dispatch;
 namespace type_utils = dpctl::tensor::type_utils;
 
 typedef sycl::event (*syevd_impl_fn_ptr_t)(sycl::queue,
@@ -60,7 +58,7 @@ typedef sycl::event (*syevd_impl_fn_ptr_t)(sycl::queue,
                                            std::vector<sycl::event>&,
                                            const std::vector<sycl::event>&);
 
-static syevd_impl_fn_ptr_t syevd_dispatch_vector[type_dispatch::num_types];
+static syevd_impl_fn_ptr_t syevd_dispatch_vector[dpctl_td_ns::num_types];
 
 template <typename T>
 static sycl::event syevd_impl(sycl::queue exec_q,
@@ -201,7 +199,7 @@ std::pair<sycl::event, sycl::event> syevd(sycl::queue exec_q,
         throw py::value_error("An array with output eigenvalues must be C-contiguous");
     }
 
-    auto array_types = type_dispatch::usm_ndarray_types();
+    auto array_types = dpctl_td_ns::usm_ndarray_types();
     int eig_vecs_type_id = array_types.typenum_to_lookup_id(eig_vecs.get_typenum());
     int eig_vals_type_id = array_types.typenum_to_lookup_id(eig_vals.get_typenum());
 
@@ -249,7 +247,7 @@ struct SyevdContigFactory
 
 void init_syevd_dispatch_vector(void)
 {
-    type_dispatch::DispatchVectorBuilder<syevd_impl_fn_ptr_t, SyevdContigFactory, type_dispatch::num_types> contig;
+    dpctl_td_ns::DispatchVectorBuilder<syevd_impl_fn_ptr_t, SyevdContigFactory, dpctl_td_ns::num_types> contig;
     contig.populate_dispatch_vector(syevd_dispatch_vector);
 }
 }
diff --git a/dpnp/backend/extensions/lapack/types_matrix.hpp b/dpnp/backend/extensions/lapack/types_matrix.hpp
index ebfaf3cfb125..a56ca4926256 100644
--- a/dpnp/backend/extensions/lapack/types_matrix.hpp
+++ b/dpnp/backend/extensions/lapack/types_matrix.hpp
@@ -27,6 +27,10 @@
 
 #include <type_traits>
 
+#include "utils/type_dispatch.hpp"
+
+namespace dpctl_td_ns = dpctl::tensor::type_dispatch;
+
 namespace dpnp
 {
 namespace backend
@@ -47,10 +51,10 @@ namespace types
 template <typename T, typename RealT>
 struct HeevdTypePairSupportFactory
 {
-    static constexpr bool is_defined = std::disjunction<TypePairDefinedEntry<T, std::complex<double>, RealT, double>,
-                                                        TypePairDefinedEntry<T, std::complex<float>, RealT, float>,
+    static constexpr bool is_defined = std::disjunction<dpctl_td_ns::TypePairDefinedEntry<T, std::complex<double>, RealT, double>,
+                                                        dpctl_td_ns::TypePairDefinedEntry<T, std::complex<float>, RealT, float>,
                                                         // fall-through
-                                                        NotDefinedEntry>::is_defined;
+                                                        dpctl_td_ns::NotDefinedEntry>::is_defined;
 };
 
 /**
@@ -62,10 +66,10 @@ struct HeevdTypePairSupportFactory
 template <typename T>
 struct SyevdTypePairSupportFactory
 {
-    static constexpr bool is_defined = std::disjunction<TypePairDefinedEntry<T, double, T, double>,
-                                                        TypePairDefinedEntry<T, float, T, float>,
+    static constexpr bool is_defined = std::disjunction<dpctl_td_ns::TypePairDefinedEntry<T, double, T, double>,
+                                                        dpctl_td_ns::TypePairDefinedEntry<T, float, T, float>,
                                                         // fall-through
-                                                        NotDefinedEntry>::is_defined;
+                                                        dpctl_td_ns::NotDefinedEntry>::is_defined;
 };
 }
 }

From dfca7330d8496b59df1f0b1134c1c0c81c78d86e Mon Sep 17 00:00:00 2001
From: Anton Volkov <antonwolfy@gmail.com>
Date: Wed, 31 May 2023 08:51:37 -0500
Subject: [PATCH 108/129] Fix coverage action

---
 .github/workflows/generate_coverage.yaml        | 3 +--
 dpnp/backend/extensions/lapack/CMakeLists.txt   | 3 +++
 dpnp/backend/extensions/lapack/types_matrix.hpp | 2 ++
 3 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/generate_coverage.yaml b/.github/workflows/generate_coverage.yaml
index 4d054274502f..f60a8cbc2eaa 100644
--- a/.github/workflows/generate_coverage.yaml
+++ b/.github/workflows/generate_coverage.yaml
@@ -34,7 +34,6 @@ jobs:
           python-version: ${{ env.python-ver }}
           miniconda-version: 'latest'
           activate-environment: 'coverage'
-          channels: intel, conda-forge
 
       - name: Install Lcov
         run: |
@@ -42,7 +41,7 @@ jobs:
       - name: Install dpnp dependencies
         run: |
           conda install cython llvm cmake scikit-build ninja pytest pytest-cov coverage[toml] \
-              dppy/label/dev::dpctl dpcpp_linux-64  mkl-devel-dpcpp  tbb-devel  onedpl-devel
+              dpctl dpcpp_linux-64 sysroot_linux-64">=2.28" mkl-devel-dpcpp tbb-devel onedpl-devel -c dppy/label/dev -c intel -c conda-forge --override-channels
       - name: Conda info
         run: |
           conda info
diff --git a/dpnp/backend/extensions/lapack/CMakeLists.txt b/dpnp/backend/extensions/lapack/CMakeLists.txt
index e54de4068c01..c104c15e831b 100644
--- a/dpnp/backend/extensions/lapack/CMakeLists.txt
+++ b/dpnp/backend/extensions/lapack/CMakeLists.txt
@@ -44,6 +44,9 @@ set_target_properties(${python_module_name} PROPERTIES CMAKE_POSITION_INDEPENDEN
 target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../include)
 target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../src)
 
+message(STATUS "Dpctl_INCLUDE_DIRS=" ${Dpctl_INCLUDE_DIRS})
+message(STATUS "Dpctl_TENSOR_INCLUDE_DIR=" ${Dpctl_TENSOR_INCLUDE_DIR})
+
 target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIRS})
 target_include_directories(${python_module_name} PUBLIC ${Dpctl_TENSOR_INCLUDE_DIR})
 
diff --git a/dpnp/backend/extensions/lapack/types_matrix.hpp b/dpnp/backend/extensions/lapack/types_matrix.hpp
index a56ca4926256..4175873b541f 100644
--- a/dpnp/backend/extensions/lapack/types_matrix.hpp
+++ b/dpnp/backend/extensions/lapack/types_matrix.hpp
@@ -27,8 +27,10 @@
 
 #include <type_traits>
 
+// dpctl tensor headers
 #include "utils/type_dispatch.hpp"
 
+// dpctl namespace for operations with types
 namespace dpctl_td_ns = dpctl::tensor::type_dispatch;
 
 namespace dpnp

From 74153fb589fc520646855b25b83a3cd460a48d58 Mon Sep 17 00:00:00 2001
From: Anton Volkov <antonwolfy@gmail.com>
Date: Wed, 31 May 2023 09:30:49 -0500
Subject: [PATCH 109/129] Fix sphinix build

---
 .github/workflows/build-sphinx.yml       | 5 +++--
 .github/workflows/generate_coverage.yaml | 3 ++-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/build-sphinx.yml b/.github/workflows/build-sphinx.yml
index f6664f66c1f0..f4e3b74c6237 100644
--- a/.github/workflows/build-sphinx.yml
+++ b/.github/workflows/build-sphinx.yml
@@ -17,6 +17,7 @@ jobs:
 
     env:
       python-ver: '3.9'
+      CHANNELS: '-c dppy/label/dev -c intel -c conda-forge --override-channels'
 
     steps:
       - name: Cancel Previous Runs
@@ -74,10 +75,10 @@ jobs:
       - name: Install dpnp dependencies
         run: |
           conda install dpctl mkl-devel-dpcpp onedpl-devel tbb-devel dpcpp_linux-64 \
-              cmake cython pytest ninja scikit-build -c dppy/label/dev -c intel -c conda-forge
+              cmake cython pytest ninja scikit-build sysroot_linux-64">=2.28" ${{ env.CHANNELS }}
 
       - name: Install cuPy dependencies
-        run: conda install -c conda-forge cupy cudatoolkit=10.0
+        run: conda install cupy cudatoolkit=10.0
 
       - name: Conda info
         run: conda info
diff --git a/.github/workflows/generate_coverage.yaml b/.github/workflows/generate_coverage.yaml
index f60a8cbc2eaa..fd38dde9a5d3 100644
--- a/.github/workflows/generate_coverage.yaml
+++ b/.github/workflows/generate_coverage.yaml
@@ -15,6 +15,7 @@ jobs:
 
     env:
       python-ver: '3.10'
+      CHANNELS: '-c dppy/label/dev -c intel -c conda-forge --override-channels'
 
     steps:
       - name: Cancel Previous Runs
@@ -41,7 +42,7 @@ jobs:
       - name: Install dpnp dependencies
         run: |
           conda install cython llvm cmake scikit-build ninja pytest pytest-cov coverage[toml] \
-              dpctl dpcpp_linux-64 sysroot_linux-64">=2.28" mkl-devel-dpcpp tbb-devel onedpl-devel -c dppy/label/dev -c intel -c conda-forge --override-channels
+              dpctl dpcpp_linux-64 sysroot_linux-64">=2.28" mkl-devel-dpcpp tbb-devel onedpl-devel ${{ env.CHANNELS }}
       - name: Conda info
         run: |
           conda info

From 8e2c5f7978f42235331e207cea03910f7dbfa962 Mon Sep 17 00:00:00 2001
From: Anton Volkov <antonwolfy@gmail.com>
Date: Wed, 31 May 2023 09:42:50 -0500
Subject: [PATCH 110/129] Added print of dpctl includes

---
 CMakeLists.txt                                | 3 +++
 dpnp/backend/extensions/lapack/CMakeLists.txt | 3 ---
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index cdecc3cefd72..efa35ac50869 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -57,6 +57,9 @@ set(CYTHON_FLAGS "-t -w \"${CMAKE_SOURCE_DIR}\"")
 find_package(Cython REQUIRED)
 find_package(Dpctl REQUIRED)
 
+message(STATUS "Dpctl_INCLUDE_DIRS=" ${Dpctl_INCLUDE_DIRS})
+message(STATUS "Dpctl_TENSOR_INCLUDE_DIR=" ${Dpctl_TENSOR_INCLUDE_DIR})
+
 if(WIN32)
     string(CONCAT WARNING_FLAGS
         "-Wall "
diff --git a/dpnp/backend/extensions/lapack/CMakeLists.txt b/dpnp/backend/extensions/lapack/CMakeLists.txt
index c104c15e831b..e54de4068c01 100644
--- a/dpnp/backend/extensions/lapack/CMakeLists.txt
+++ b/dpnp/backend/extensions/lapack/CMakeLists.txt
@@ -44,9 +44,6 @@ set_target_properties(${python_module_name} PROPERTIES CMAKE_POSITION_INDEPENDEN
 target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../include)
 target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../src)
 
-message(STATUS "Dpctl_INCLUDE_DIRS=" ${Dpctl_INCLUDE_DIRS})
-message(STATUS "Dpctl_TENSOR_INCLUDE_DIR=" ${Dpctl_TENSOR_INCLUDE_DIR})
-
 target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIRS})
 target_include_directories(${python_module_name} PUBLIC ${Dpctl_TENSOR_INCLUDE_DIR})
 

From 4e4d231b3da17920884e847066148310cb1d7c42 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Wed, 19 Apr 2023 11:43:03 +0200
Subject: [PATCH 111/129] reuse dpctl.tensor.moveaxis for dpnp.moveaxis

---
 dpnp/dpnp_iface_manipulation.py               | 41 +++++++------------
 tests/skipped_tests.tbl                       |  2 -
 tests/skipped_tests_gpu.tbl                   |  2 -
 .../cupy/manipulation_tests/test_transpose.py |  7 ----
 4 files changed, 15 insertions(+), 37 deletions(-)

diff --git a/dpnp/dpnp_iface_manipulation.py b/dpnp/dpnp_iface_manipulation.py
index 567661bdb57f..02de340e06d6 100644
--- a/dpnp/dpnp_iface_manipulation.py
+++ b/dpnp/dpnp_iface_manipulation.py
@@ -388,17 +388,25 @@ def hstack(tup):
     return call_origin(numpy.hstack, tup_new)
 
 
-def moveaxis(x1, source, destination):
+def moveaxis(x, source, destination):
     """
     Move axes of an array to new positions. Other axes remain in their original order.
 
     For full documentation refer to :obj:`numpy.moveaxis`.
 
+    Returns
+    -------
+    out : dpnp.ndarray
+        Array with moved axes.
+        The returned array must has the same data type as `x`,
+        is created on the same device as `x` and has the same
+        USM allocation type as `x`.
+
     Limitations
     -----------
-    Input array ``x1`` is supported as :obj:`dpnp.ndarray`.
+    Parameters `x` is supported as either :class:`dpnp.ndarray`
+    or :class:`dpctl.tensor.usm_ndarray`.
     Otherwise the function will be executed sequentially on CPU.
-    Sizes of normalized input arrays are supported to be equal.
     Input array data types are limited by supported DPNP :ref:`Data types`.
 
     See Also
@@ -417,30 +425,11 @@ def moveaxis(x1, source, destination):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
-    if x1_desc:
-        source_norm = normalize_axis(source, x1_desc.ndim)
-        destination_norm = normalize_axis(destination, x1_desc.ndim)
-
-        if len(source_norm) != len(destination_norm):
-            pass
-        else:
-            # 'do nothing' pattern for transpose() with no elements in 'source'
-            input_permute = []
-            for i in range(x1_desc.ndim):
-                if i not in source_norm:
-                    input_permute.append(i)
-
-            # insert moving axes into proper positions
-            for destination_id, source_id in sorted(zip(destination_norm, source_norm)):
-                # if destination_id in input_permute:
-                # pytest tests/third_party/cupy/manipulation_tests/test_transpose.py::TestTranspose::test_moveaxis_invalid5_3
-                # checker_throw_value_error("swapaxes", "source_id exists", source_id, input_permute)
-                input_permute.insert(destination_id, source_id)
-
-            return transpose(x1_desc.get_pyobj(), axes=input_permute)
+    if isinstance(x, dpnp_array) or isinstance(x, dpt.usm_ndarray):
+        dpt_array = x.get_array() if isinstance(x, dpnp_array) else x
+        return dpnp_array._create_from_usm_ndarray(dpt.moveaxis(dpt_array, source, destination))
 
-    return call_origin(numpy.moveaxis, x1, source, destination)
+    return call_origin(numpy.moveaxis, x, source, destination)
 
 
 def ravel(x1, order='C'):
diff --git a/tests/skipped_tests.tbl b/tests/skipped_tests.tbl
index 68bc5c8bc1dc..d99fa8ac74cc 100644
--- a/tests/skipped_tests.tbl
+++ b/tests/skipped_tests.tbl
@@ -750,8 +750,6 @@ tests/third_party/cupy/manipulation_tests/test_tiling.py::TestTile_param_2_{reps
 tests/third_party/cupy/manipulation_tests/test_tiling.py::TestTile_param_3_{reps=(0, 1)}::test_array_tile
 tests/third_party/cupy/manipulation_tests/test_tiling.py::TestTile_param_4_{reps=(2, 3)}::test_array_tile
 tests/third_party/cupy/manipulation_tests/test_tiling.py::TestTile_param_5_{reps=(2, 3, 4, 5)}::test_array_tile
-tests/third_party/cupy/manipulation_tests/test_transpose.py::TestTranspose::test_moveaxis_invalid5_2
-tests/third_party/cupy/manipulation_tests/test_transpose.py::TestTranspose::test_moveaxis_invalid5_3
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_455_{arg1=array([[1, 2, 3],       [4, 5, 6]], dtype=int32), arg2=array([[0, 1, 2],       [3, 4, 5]], dtype=int32), dtype=float64, name='floor_divide', use_dtype=False}::test_binary
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_457_{arg1=array([[1, 2, 3],       [4, 5, 6]], dtype=int32), arg2=array([[0, 1, 2],       [3, 4, 5]], dtype=int32), dtype=float64, name='fmod', use_dtype=False}::test_binary
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_459_{arg1=array([[1, 2, 3],       [4, 5, 6]], dtype=int32), arg2=array([[0, 1, 2],       [3, 4, 5]], dtype=int32), dtype=float64, name='remainder', use_dtype=False}::test_binary
diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl
index b99492832ff9..87a0ff9d43b7 100644
--- a/tests/skipped_tests_gpu.tbl
+++ b/tests/skipped_tests_gpu.tbl
@@ -893,8 +893,6 @@ tests/third_party/cupy/manipulation_tests/test_tiling.py::TestTile_param_2_{reps
 tests/third_party/cupy/manipulation_tests/test_tiling.py::TestTile_param_3_{reps=(0, 1)}::test_array_tile
 tests/third_party/cupy/manipulation_tests/test_tiling.py::TestTile_param_4_{reps=(2, 3)}::test_array_tile
 tests/third_party/cupy/manipulation_tests/test_tiling.py::TestTile_param_5_{reps=(2, 3, 4, 5)}::test_array_tile
-tests/third_party/cupy/manipulation_tests/test_transpose.py::TestTranspose::test_moveaxis_invalid5_2
-tests/third_party/cupy/manipulation_tests/test_transpose.py::TestTranspose::test_moveaxis_invalid5_3
 
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_279_{arg1=array([[1., 2., 3.],       [4., 5., 6.]], dtype=float32), arg2=array([[0., 1., 2.],       [3., 4., 5.]], dtype=float32), dtype=float64, name='floor_divide', use_dtype=False}::test_binary
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_287_{arg1=array([[1., 2., 3.],       [4., 5., 6.]], dtype=float32), arg2=array([[0., 1., 2.],       [3., 4., 5.]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary
diff --git a/tests/third_party/cupy/manipulation_tests/test_transpose.py b/tests/third_party/cupy/manipulation_tests/test_transpose.py
index ed06f050f777..91e8fe9a2cdb 100644
--- a/tests/third_party/cupy/manipulation_tests/test_transpose.py
+++ b/tests/third_party/cupy/manipulation_tests/test_transpose.py
@@ -41,14 +41,12 @@ def test_moveaxis6(self, xp):
         return xp.moveaxis(a, [0, 2, 1], [3, 4, 0])
 
     # dim is too large
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_moveaxis_invalid1_1(self):
         for xp in (numpy, cupy):
             a = testing.shaped_arange((2, 3, 4), xp)
             with pytest.raises(numpy.AxisError):
                 xp.moveaxis(a, [0, 1], [1, 3])
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_moveaxis_invalid1_2(self):
         for xp in (numpy, cupy):
             a = testing.shaped_arange((2, 3, 4), xp)
@@ -56,14 +54,12 @@ def test_moveaxis_invalid1_2(self):
                 xp.moveaxis(a, [0, 1], [1, 3])
 
     # dim is too small
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_moveaxis_invalid2_1(self):
         for xp in (numpy, cupy):
             a = testing.shaped_arange((2, 3, 4), xp)
             with pytest.raises(numpy.AxisError):
                 xp.moveaxis(a, [0, -4], [1, 2])
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_moveaxis_invalid2_2(self):
         for xp in (numpy, cupy):
             a = testing.shaped_arange((2, 3, 4), xp)
@@ -71,7 +67,6 @@ def test_moveaxis_invalid2_2(self):
                 xp.moveaxis(a, [0, -4], [1, 2])
 
     # len(source) != len(destination)
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_moveaxis_invalid3(self):
         for xp in (numpy, cupy):
             a = testing.shaped_arange((2, 3, 4), xp)
@@ -79,7 +74,6 @@ def test_moveaxis_invalid3(self):
                 xp.moveaxis(a, [0, 1, 2], [1, 2])
 
     # len(source) != len(destination)
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_moveaxis_invalid4(self):
         for xp in (numpy, cupy):
             a = testing.shaped_arange((2, 3, 4), xp)
@@ -87,7 +81,6 @@ def test_moveaxis_invalid4(self):
                 xp.moveaxis(a, [0, 1], [1, 2, 0])
 
     # Use the same axis twice
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_moveaxis_invalid5_1(self):
         for xp in (numpy, cupy):
             a = testing.shaped_arange((2, 3, 4), xp)

From ac5d887e5951459a6f36d277c1a6810eb2d11ee5 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Thu, 27 Apr 2023 13:49:51 +0200
Subject: [PATCH 112/129] Fix remark

---
 dpnp/dpnp_iface_manipulation.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/dpnp/dpnp_iface_manipulation.py b/dpnp/dpnp_iface_manipulation.py
index 02de340e06d6..60c1c5e79ef1 100644
--- a/dpnp/dpnp_iface_manipulation.py
+++ b/dpnp/dpnp_iface_manipulation.py
@@ -398,9 +398,8 @@ def moveaxis(x, source, destination):
     -------
     out : dpnp.ndarray
         Array with moved axes.
-        The returned array must has the same data type as `x`,
-        is created on the same device as `x` and has the same
-        USM allocation type as `x`.
+        The returned array will have the same data and
+        the same USM allocation type as `x`.
 
     Limitations
     -----------

From 2cd7600772c9488f630635ef754a75aa57fb9ab4 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Mon, 17 Apr 2023 22:40:21 +0200
Subject: [PATCH 113/129] Reuse dpctl.tensor.squeeze for dpnp.squeeze

---
 dpnp/dpnp_algo/dpnp_algo_manipulation.pxi | 21 -------------
 dpnp/dpnp_iface_manipulation.py           | 37 ++++++++++++++---------
 tests/skipped_tests.tbl                   |  9 ------
 tests/skipped_tests_gpu.tbl               |  9 ------
 4 files changed, 22 insertions(+), 54 deletions(-)

diff --git a/dpnp/dpnp_algo/dpnp_algo_manipulation.pxi b/dpnp/dpnp_algo/dpnp_algo_manipulation.pxi
index 3e27af363c3c..b9234dbe5ab2 100644
--- a/dpnp/dpnp_algo/dpnp_algo_manipulation.pxi
+++ b/dpnp/dpnp_algo/dpnp_algo_manipulation.pxi
@@ -43,7 +43,6 @@ __all__ += [
     "dpnp_repeat",
     "dpnp_reshape",
     "dpnp_transpose",
-    "dpnp_squeeze",
 ]
 
 
@@ -294,23 +293,3 @@ cpdef utils.dpnp_descriptor dpnp_transpose(utils.dpnp_descriptor array1, axes=No
     c_dpctl.DPCTLEvent_Delete(event_ref)
 
     return result
-
-
-cpdef utils.dpnp_descriptor dpnp_squeeze(utils.dpnp_descriptor in_array, axis):
-    cdef shape_type_c shape_list
-    if axis is None:
-        for i in range(in_array.ndim):
-            if in_array.shape[i] != 1:
-                shape_list.push_back(in_array.shape[i])
-    else:
-        axis_norm = utils._object_to_tuple(utils.normalize_axis(utils._object_to_tuple(axis), in_array.ndim))
-        for i in range(in_array.ndim):
-            if i in axis_norm:
-                if in_array.shape[i] != 1:
-                    utils.checker_throw_value_error("dpnp_squeeze", "axis", axis, "axis has size not equal to one")
-            else:
-                shape_list.push_back(in_array.shape[i])
-
-    in_array_obj = in_array.get_array()
-
-    return dpnp_reshape(dpnp_copy(in_array), shape_list)
diff --git a/dpnp/dpnp_iface_manipulation.py b/dpnp/dpnp_iface_manipulation.py
index 60c1c5e79ef1..98d7abfe1c09 100644
--- a/dpnp/dpnp_iface_manipulation.py
+++ b/dpnp/dpnp_iface_manipulation.py
@@ -571,12 +571,28 @@ def rollaxis(x1, axis, start=0):
     return call_origin(numpy.rollaxis, x1, axis, start)
 
 
-def squeeze(x1, axis=None):
+def squeeze(x, axis=None):
     """
     Remove single-dimensional entries from the shape of an array.
 
     For full documentation refer to :obj:`numpy.squeeze`.
 
+    Returns
+    -------
+    out : dpnp.ndarray
+        Output array is a view, if possible,
+        and a copy otherwise, but with all or a subset of the
+        dimensions of length 1 removed. Output has the same data
+        type as the input, is allocated on the same device as the
+        input and has the same USM allocation type as the input
+        array `x`.
+
+    Limitations
+    -----------
+    Parameters `x` is supported as either :class:`dpnp.ndarray`
+    or :class:`dpctl.tensor.usm_ndarray`.
+    Otherwise the function will be executed sequentially on CPU.
+
     Examples
     --------
     >>> import dpnp as np
@@ -590,26 +606,17 @@ def squeeze(x1, axis=None):
     >>> np.squeeze(x, axis=1).shape
     Traceback (most recent call last):
     ...
-    ValueError: cannot select an axis to squeeze out which has size not equal to one
+    ValueError: Cannot select an axis to squeeze out which has size not equal to one
     >>> np.squeeze(x, axis=2).shape
     (1, 3)
-    >>> x = np.array([[1234]])
-    >>> x.shape
-    (1, 1)
-    >>> np.squeeze(x)
-    array(1234)  # 0d array
-    >>> np.squeeze(x).shape
-    ()
-    >>> np.squeeze(x)[()]
-    1234
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
-    if x1_desc:
-        return dpnp_squeeze(x1_desc, axis).get_pyobj()
+    if isinstance(x, dpnp_array) or isinstance(x, dpt.usm_ndarray):
+        dpt_array = x.get_array() if isinstance(x, dpnp_array) else x
+        return dpnp_array._create_from_usm_ndarray(dpt.squeeze(dpt_array, axis))
 
-    return call_origin(numpy.squeeze, x1, axis)
+    return call_origin(numpy.squeeze, x, axis)
 
 
 def stack(arrays, axis=0, out=None):
diff --git a/tests/skipped_tests.tbl b/tests/skipped_tests.tbl
index d99fa8ac74cc..d4d77828b61a 100644
--- a/tests/skipped_tests.tbl
+++ b/tests/skipped_tests.tbl
@@ -670,15 +670,6 @@ tests/third_party/cupy/manipulation_tests/test_dims.py::TestBroadcast_param_8_{s
 tests/third_party/cupy/manipulation_tests/test_dims.py::TestBroadcast_param_9_{shapes=[(0, 1, 1, 3), (2, 1, 0, 0, 3)]}::test_broadcast
 tests/third_party/cupy/manipulation_tests/test_dims.py::TestBroadcast_param_9_{shapes=[(0, 1, 1, 3), (2, 1, 0, 0, 3)]}::test_broadcast_arrays
 
-tests/third_party/cupy/manipulation_tests/test_dims.py::TestDims::test_squeeze_int_axis_failure1
-tests/third_party/cupy/manipulation_tests/test_dims.py::TestDims::test_squeeze_int_axis_failure2
-tests/third_party/cupy/manipulation_tests/test_dims.py::TestDims::test_squeeze_scalar_failure1
-tests/third_party/cupy/manipulation_tests/test_dims.py::TestDims::test_squeeze_scalar_failure2
-tests/third_party/cupy/manipulation_tests/test_dims.py::TestDims::test_squeeze_scalar_failure3
-tests/third_party/cupy/manipulation_tests/test_dims.py::TestDims::test_squeeze_scalar_failure4
-tests/third_party/cupy/manipulation_tests/test_dims.py::TestDims::test_squeeze_tuple_axis_failure1
-tests/third_party/cupy/manipulation_tests/test_dims.py::TestDims::test_squeeze_tuple_axis_failure2
-tests/third_party/cupy/manipulation_tests/test_dims.py::TestDims::test_squeeze_tuple_axis_failure3
 tests/third_party/cupy/manipulation_tests/test_dims.py::TestInvalidBroadcast_param_0_{shapes=[(3,), (2,)]}::test_invalid_broadcast
 tests/third_party/cupy/manipulation_tests/test_dims.py::TestInvalidBroadcast_param_0_{shapes=[(3,), (2,)]}::test_invalid_broadcast_arrays
 tests/third_party/cupy/manipulation_tests/test_dims.py::TestInvalidBroadcast_param_1_{shapes=[(3, 2), (2, 3)]}::test_invalid_broadcast
diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl
index 87a0ff9d43b7..f18d39cd9f48 100644
--- a/tests/skipped_tests_gpu.tbl
+++ b/tests/skipped_tests_gpu.tbl
@@ -813,15 +813,6 @@ tests/third_party/cupy/manipulation_tests/test_dims.py::TestBroadcast_param_8_{s
 tests/third_party/cupy/manipulation_tests/test_dims.py::TestBroadcast_param_9_{shapes=[(0, 1, 1, 3), (2, 1, 0, 0, 3)]}::test_broadcast
 tests/third_party/cupy/manipulation_tests/test_dims.py::TestBroadcast_param_9_{shapes=[(0, 1, 1, 3), (2, 1, 0, 0, 3)]}::test_broadcast_arrays
 
-tests/third_party/cupy/manipulation_tests/test_dims.py::TestDims::test_squeeze_int_axis_failure1
-tests/third_party/cupy/manipulation_tests/test_dims.py::TestDims::test_squeeze_int_axis_failure2
-tests/third_party/cupy/manipulation_tests/test_dims.py::TestDims::test_squeeze_scalar_failure1
-tests/third_party/cupy/manipulation_tests/test_dims.py::TestDims::test_squeeze_scalar_failure2
-tests/third_party/cupy/manipulation_tests/test_dims.py::TestDims::test_squeeze_scalar_failure3
-tests/third_party/cupy/manipulation_tests/test_dims.py::TestDims::test_squeeze_scalar_failure4
-tests/third_party/cupy/manipulation_tests/test_dims.py::TestDims::test_squeeze_tuple_axis_failure1
-tests/third_party/cupy/manipulation_tests/test_dims.py::TestDims::test_squeeze_tuple_axis_failure2
-tests/third_party/cupy/manipulation_tests/test_dims.py::TestDims::test_squeeze_tuple_axis_failure3
 tests/third_party/cupy/manipulation_tests/test_dims.py::TestInvalidBroadcast_param_0_{shapes=[(3,), (2,)]}::test_invalid_broadcast
 tests/third_party/cupy/manipulation_tests/test_dims.py::TestInvalidBroadcast_param_0_{shapes=[(3,), (2,)]}::test_invalid_broadcast_arrays
 tests/third_party/cupy/manipulation_tests/test_dims.py::TestInvalidBroadcast_param_1_{shapes=[(3, 2), (2, 3)]}::test_invalid_broadcast

From 67e5ce1e5f8ed3d2ce5a2a84c74b9ef9dd9795f0 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Thu, 27 Apr 2023 13:45:11 +0200
Subject: [PATCH 114/129] Fix remarks

---
 dpnp/dpnp_iface_manipulation.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/dpnp/dpnp_iface_manipulation.py b/dpnp/dpnp_iface_manipulation.py
index 98d7abfe1c09..b317a0a9a11a 100644
--- a/dpnp/dpnp_iface_manipulation.py
+++ b/dpnp/dpnp_iface_manipulation.py
@@ -571,9 +571,9 @@ def rollaxis(x1, axis, start=0):
     return call_origin(numpy.rollaxis, x1, axis, start)
 
 
-def squeeze(x, axis=None):
+def squeeze(x, /, axis=None):
     """
-    Remove single-dimensional entries from the shape of an array.
+    Removes singleton dimensions (axes) from array `x`.
 
     For full documentation refer to :obj:`numpy.squeeze`.
 
@@ -606,7 +606,7 @@ def squeeze(x, axis=None):
     >>> np.squeeze(x, axis=1).shape
     Traceback (most recent call last):
     ...
-    ValueError: Cannot select an axis to squeeze out which has size not equal to one
+    ValueError: Cannot select an axis to squeeze out which has size not equal to one.
     >>> np.squeeze(x, axis=2).shape
     (1, 3)
 

From 3b3fecfd07ffe6689ddeb662123c557af88cfb20 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Fri, 14 Apr 2023 16:46:28 +0200
Subject: [PATCH 115/129] Remove dpnp.where implementation

---
 dpnp/backend/include/dpnp_iface.hpp          |  51 ----
 dpnp/backend/include/dpnp_iface_fptr.hpp     |   1 -
 dpnp/backend/kernels/dpnp_krnl_searching.cpp | 255 -------------------
 dpnp/dpnp_algo/dpnp_algo.pxd                 |   2 -
 dpnp/dpnp_algo/dpnp_algo_searching.pxi       | 102 --------
 5 files changed, 411 deletions(-)

diff --git a/dpnp/backend/include/dpnp_iface.hpp b/dpnp/backend/include/dpnp_iface.hpp
index 348dd8e7bff4..7a80b40a3d2e 100644
--- a/dpnp/backend/include/dpnp_iface.hpp
+++ b/dpnp/backend/include/dpnp_iface.hpp
@@ -1683,57 +1683,6 @@ INP_DLLEXPORT void dpnp_var_c(void* array,
                               size_t naxis,
                               size_t ddof);
 
-/**
- * @ingroup BACKEND_API
- * @brief Implementation of where function
- *
- * @param [in]  q_ref               Reference to SYCL queue.
- * @param [out] result_out          Output array.
- * @param [in]  result_size         Size of output array.
- * @param [in]  result_ndim         Number of output array dimensions.
- * @param [in]  result_shape        Shape of output array.
- * @param [in]  result_strides      Strides of output array.
- * @param [in]  condition_in        Condition array.
- * @param [in]  condition_size      Size of condition array.
- * @param [in]  condition_ndim      Number of condition array dimensions.
- * @param [in]  condition_shape     Shape of condition array.
- * @param [in]  condition_strides   Strides of condition array.
- * @param [in]  input1_in           First input array.
- * @param [in]  input1_size         Size of first input array.
- * @param [in]  input1_ndim         Number of first input array dimensions.
- * @param [in]  input1_shape        Shape of first input array.
- * @param [in]  input1_strides      Strides of first input array.
- * @param [in]  input2_in           Second input array.
- * @param [in]  input2_size         Size of second input array.
- * @param [in]  input2_ndim         Number of second input array dimensions.
- * @param [in]  input2_shape        Shape of second input array.
- * @param [in]  input2_strides      Strides of second input array.
- * @param [in]  dep_event_vec_ref   Reference to vector of SYCL events.
- */
-template <typename _DataType_output, typename _DataType_input1, typename _DataType_input2>
-INP_DLLEXPORT DPCTLSyclEventRef dpnp_where_c(DPCTLSyclQueueRef q_ref,
-                                             void* result_out,
-                                             const size_t result_size,
-                                             const size_t result_ndim,
-                                             const shape_elem_type* result_shape,
-                                             const shape_elem_type* result_strides,
-                                             const void* condition_in,
-                                             const size_t condition_size,
-                                             const size_t condition_ndim,
-                                             const shape_elem_type* condition_shape,
-                                             const shape_elem_type* condition_strides,
-                                             const void* input1_in,
-                                             const size_t input1_size,
-                                             const size_t input1_ndim,
-                                             const shape_elem_type* input1_shape,
-                                             const shape_elem_type* input1_strides,
-                                             const void* input2_in,
-                                             const size_t input2_size,
-                                             const size_t input2_ndim,
-                                             const shape_elem_type* input2_shape,
-                                             const shape_elem_type* input2_strides,
-                                             const DPCTLEventVectorRef dep_event_vec_ref);
-
 /**
  * @ingroup BACKEND_API
  * @brief Implementation of invert function
diff --git a/dpnp/backend/include/dpnp_iface_fptr.hpp b/dpnp/backend/include/dpnp_iface_fptr.hpp
index 653471fd1b50..3a0dc7d0a526 100644
--- a/dpnp/backend/include/dpnp_iface_fptr.hpp
+++ b/dpnp/backend/include/dpnp_iface_fptr.hpp
@@ -375,7 +375,6 @@ enum class DPNPFuncName : size_t
     DPNP_FN_VANDER_EXT,                   /**< Used in numpy.vander() impl, requires extra parameters */
     DPNP_FN_VAR,                          /**< Used in numpy.var() impl  */
     DPNP_FN_VAR_EXT,                      /**< Used in numpy.var() impl, requires extra parameters */
-    DPNP_FN_WHERE_EXT,                    /**< Used in numpy.where() impl, requires extra parameters */
     DPNP_FN_ZEROS,                        /**< Used in numpy.zeros() impl */
     DPNP_FN_ZEROS_LIKE,                   /**< Used in numpy.zeros_like() impl */
     DPNP_FN_LAST,                         /**< The latest element of the enumeration */
diff --git a/dpnp/backend/kernels/dpnp_krnl_searching.cpp b/dpnp/backend/kernels/dpnp_krnl_searching.cpp
index 471d524643f5..9bbb35068134 100644
--- a/dpnp/backend/kernels/dpnp_krnl_searching.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_searching.cpp
@@ -27,7 +27,6 @@
 
 #include <dpnp_iface.hpp>
 #include "dpnp_fptr.hpp"
-#include "dpnp_iterator.hpp"
 #include "dpnpc_memory_adapter.hpp"
 #include "queue_sycl.hpp"
 
@@ -140,258 +139,6 @@ DPCTLSyclEventRef (*dpnp_argmin_ext_c)(DPCTLSyclQueueRef,
                                        size_t,
                                        const DPCTLEventVectorRef) = dpnp_argmin_c<_DataType, _idx_DataType>;
 
-
-template <typename _DataType_output, typename _DataType_input1, typename _DataType_input2>
-class dpnp_where_c_broadcast_kernel;
-
-template <typename _DataType_output, typename _DataType_input1, typename _DataType_input2>
-class dpnp_where_c_strides_kernel;
-
-template <typename _DataType_output, typename _DataType_input1, typename _DataType_input2>
-class dpnp_where_c_kernel;
-
-template <typename _DataType_output, typename _DataType_input1, typename _DataType_input2>
-DPCTLSyclEventRef dpnp_where_c(DPCTLSyclQueueRef q_ref,
-                               void* result_out,
-                               const size_t result_size,
-                               const size_t result_ndim,
-                               const shape_elem_type* result_shape,
-                               const shape_elem_type* result_strides,
-                               const void* condition_in,
-                               const size_t condition_size,
-                               const size_t condition_ndim,
-                               const shape_elem_type* condition_shape,
-                               const shape_elem_type* condition_strides,
-                               const void* input1_in,
-                               const size_t input1_size,
-                               const size_t input1_ndim,
-                               const shape_elem_type* input1_shape,
-                               const shape_elem_type* input1_strides,
-                               const void* input2_in,
-                               const size_t input2_size,
-                               const size_t input2_ndim,
-                               const shape_elem_type* input2_shape,
-                               const shape_elem_type* input2_strides,
-                               const DPCTLEventVectorRef dep_event_vec_ref)
-{
-    /* avoid warning unused variable*/
-    (void)dep_event_vec_ref;
-
-    DPCTLSyclEventRef event_ref = nullptr;
-
-    if (!condition_size || !input1_size || !input2_size)
-    {
-        return event_ref;
-    }
-
-    sycl::queue q = *(reinterpret_cast<sycl::queue*>(q_ref));
-
-    bool* condition_data = static_cast<bool*>(const_cast<void*>(condition_in));
-    _DataType_input1* input1_data = static_cast<_DataType_input1*>(const_cast<void*>(input1_in));
-    _DataType_input2* input2_data = static_cast<_DataType_input2*>(const_cast<void*>(input2_in));
-    _DataType_output* result = static_cast<_DataType_output*>(result_out);
-
-    bool use_broadcasting = !array_equal(input1_shape, input1_ndim, input2_shape, input2_ndim);
-    use_broadcasting = use_broadcasting || !array_equal(condition_shape, condition_ndim, input1_shape, input1_ndim);
-    use_broadcasting = use_broadcasting || !array_equal(condition_shape, condition_ndim, input2_shape, input2_ndim);
-
-    shape_elem_type* condition_shape_offsets = new shape_elem_type[condition_ndim];
-
-    get_shape_offsets_inkernel(condition_shape, condition_ndim, condition_shape_offsets);
-    bool use_strides = !array_equal(condition_strides, condition_ndim, condition_shape_offsets, condition_ndim);
-    delete[] condition_shape_offsets;
-
-    shape_elem_type* input1_shape_offsets = new shape_elem_type[input1_ndim];
-
-    get_shape_offsets_inkernel(input1_shape, input1_ndim, input1_shape_offsets);
-    use_strides = use_strides || !array_equal(input1_strides, input1_ndim, input1_shape_offsets, input1_ndim);
-    delete[] input1_shape_offsets;
-
-    shape_elem_type* input2_shape_offsets = new shape_elem_type[input2_ndim];
-
-    get_shape_offsets_inkernel(input2_shape, input2_ndim, input2_shape_offsets);
-    use_strides = use_strides || !array_equal(input2_strides, input2_ndim, input2_shape_offsets, input2_ndim);
-    delete[] input2_shape_offsets;
-
-    sycl::event event;
-    sycl::range<1> gws(result_size);
-
-    if (use_broadcasting)
-    {
-        DPNPC_id<bool>* condition_it;
-        const size_t condition_it_it_size_in_bytes = sizeof(DPNPC_id<bool>);
-        condition_it = reinterpret_cast<DPNPC_id<bool>*>(dpnp_memory_alloc_c(q_ref, condition_it_it_size_in_bytes));
-        new (condition_it) DPNPC_id<bool>(q_ref, condition_data, condition_shape, condition_strides, condition_ndim);
-
-        condition_it->broadcast_to_shape(result_shape, result_ndim);
-
-        DPNPC_id<_DataType_input1>* input1_it;
-        const size_t input1_it_size_in_bytes = sizeof(DPNPC_id<_DataType_input1>);
-        input1_it = reinterpret_cast<DPNPC_id<_DataType_input1>*>(dpnp_memory_alloc_c(q_ref, input1_it_size_in_bytes));
-        new (input1_it) DPNPC_id<_DataType_input1>(q_ref, input1_data, input1_shape, input1_strides, input1_ndim);
-
-        input1_it->broadcast_to_shape(result_shape, result_ndim);
-
-        DPNPC_id<_DataType_input2>* input2_it;
-        const size_t input2_it_size_in_bytes = sizeof(DPNPC_id<_DataType_input2>);
-        input2_it = reinterpret_cast<DPNPC_id<_DataType_input2>*>(dpnp_memory_alloc_c(q_ref, input2_it_size_in_bytes));
-        new (input2_it) DPNPC_id<_DataType_input2>(q_ref, input2_data, input2_shape, input2_strides, input2_ndim);
-
-        input2_it->broadcast_to_shape(result_shape, result_ndim);
-
-        auto kernel_parallel_for_func = [=](sycl::id<1> global_id) {
-            const size_t i = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */
-            {
-                const bool condition = (*condition_it)[i];
-                const _DataType_output input1_elem = (*input1_it)[i];
-                const _DataType_output input2_elem = (*input2_it)[i];
-                result[i] = (condition) ? input1_elem : input2_elem;
-            }
-        };
-        auto kernel_func = [&](sycl::handler& cgh) {
-            cgh.parallel_for<class dpnp_where_c_broadcast_kernel<_DataType_output, _DataType_input1, _DataType_input2>>(
-                gws, kernel_parallel_for_func);
-        };
-
-        q.submit(kernel_func).wait();
-
-        condition_it->~DPNPC_id();
-        input1_it->~DPNPC_id();
-        input2_it->~DPNPC_id();
-
-        return event_ref;
-    }
-    else if (use_strides)
-    {
-        if ((result_ndim != condition_ndim) || (result_ndim != input1_ndim) || (result_ndim != input2_ndim))
-        {
-            throw std::runtime_error("Result ndim=" + std::to_string(result_ndim) +
-                                     " mismatches with either condition ndim=" + std::to_string(condition_ndim) +
-                                     " or input1 ndim=" + std::to_string(input1_ndim) +
-                                     " or input2 ndim=" + std::to_string(input2_ndim));
-        }
-
-        /* memory transfer optimization, use USM-host for temporary speeds up tranfer to device */
-        using usm_host_allocatorT = sycl::usm_allocator<shape_elem_type, sycl::usm::alloc::host>;
-
-        size_t strides_size = 4 * result_ndim;
-        shape_elem_type* dev_strides_data = sycl::malloc_device<shape_elem_type>(strides_size, q);
-
-        /* create host temporary for packed strides managed by shared pointer */
-        auto strides_host_packed =
-            std::vector<shape_elem_type, usm_host_allocatorT>(strides_size, usm_host_allocatorT(q));
-
-        /* packed vector is concatenation of result_strides, condition_strides, input1_strides and input2_strides */
-        std::copy(result_strides, result_strides + result_ndim, strides_host_packed.begin());
-        std::copy(condition_strides, condition_strides + result_ndim, strides_host_packed.begin() + result_ndim);
-        std::copy(input1_strides, input1_strides + result_ndim, strides_host_packed.begin() + 2 * result_ndim);
-        std::copy(input2_strides, input2_strides + result_ndim, strides_host_packed.begin() + 3 * result_ndim);
-
-        auto copy_strides_ev =
-            q.copy<shape_elem_type>(strides_host_packed.data(), dev_strides_data, strides_host_packed.size());
-
-        auto kernel_parallel_for_func = [=](sycl::id<1> global_id) {
-            const size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */
-            {
-                const shape_elem_type* result_strides_data = &dev_strides_data[0];
-                const shape_elem_type* condition_strides_data = &dev_strides_data[result_ndim];
-                const shape_elem_type* input1_strides_data = &dev_strides_data[2 * result_ndim];
-                const shape_elem_type* input2_strides_data = &dev_strides_data[3 * result_ndim];
-
-                size_t condition_id = 0;
-                size_t input1_id = 0;
-                size_t input2_id = 0;
-
-                for (size_t i = 0; i < result_ndim; ++i)
-                {
-                    const size_t output_xyz_id =
-                        get_xyz_id_by_id_inkernel(output_id, result_strides_data, result_ndim, i);
-                    condition_id += output_xyz_id * condition_strides_data[i];
-                    input1_id    += output_xyz_id * input1_strides_data[i];
-                    input2_id    += output_xyz_id * input2_strides_data[i];
-                }
-
-                const bool condition = condition_data[condition_id];
-                const _DataType_output input1_elem = input1_data[input1_id];
-                const _DataType_output input2_elem = input2_data[input2_id];
-                result[output_id] = (condition) ? input1_elem : input2_elem;
-            }
-        };
-        auto kernel_func = [&](sycl::handler& cgh) {
-            cgh.depends_on(copy_strides_ev);
-            cgh.parallel_for<class dpnp_where_c_strides_kernel<_DataType_output, _DataType_input1, _DataType_input2>>(
-                gws, kernel_parallel_for_func);
-        };
-
-        q.submit(kernel_func).wait();
-
-        sycl::free(dev_strides_data, q);
-        return event_ref;
-    }
-    else
-    {
-        auto kernel_parallel_for_func = [=](sycl::id<1> global_id) {
-            const size_t i = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */
-
-            const bool condition = condition_data[i];
-            const _DataType_output input1_elem = input1_data[i];
-            const _DataType_output input2_elem = input2_data[i];
-            result[i] = (condition) ? input1_elem : input2_elem;
-        };
-        auto kernel_func = [&](sycl::handler& cgh) {
-            cgh.parallel_for<class dpnp_where_c_kernel<_DataType_output, _DataType_input1, _DataType_input2>>(
-                gws, kernel_parallel_for_func);
-        };
-        event = q.submit(kernel_func);
-    }
-
-    event_ref = reinterpret_cast<DPCTLSyclEventRef>(&event);
-    return DPCTLEvent_Copy(event_ref);
-
-    return event_ref;
-}
-
-template <typename _DataType_output, typename _DataType_input1, typename _DataType_input2>
-DPCTLSyclEventRef (*dpnp_where_ext_c)(DPCTLSyclQueueRef,
-                                      void*,
-                                      const size_t,
-                                      const size_t,
-                                      const shape_elem_type*,
-                                      const shape_elem_type*,
-                                      const void*,
-                                      const size_t,
-                                      const size_t,
-                                      const shape_elem_type*,
-                                      const shape_elem_type*,
-                                      const void*,
-                                      const size_t,
-                                      const size_t,
-                                      const shape_elem_type*,
-                                      const shape_elem_type*,
-                                      const void*,
-                                      const size_t,
-                                      const size_t,
-                                      const shape_elem_type*,
-                                      const shape_elem_type*,
-                                      const DPCTLEventVectorRef) = dpnp_where_c<_DataType_output, _DataType_input1, _DataType_input2>;
-
-template <DPNPFuncType FT1, DPNPFuncType... FTs>
-static void func_map_searching_2arg_3type_core(func_map_t& fmap)
-{
-    ((fmap[DPNPFuncName::DPNP_FN_WHERE_EXT][FT1][FTs] =
-          {populate_func_types<FT1, FTs>(),
-           (void*)dpnp_where_ext_c<func_type_map_t::find_type<populate_func_types<FT1, FTs>()>,
-                                   func_type_map_t::find_type<FT1>,
-                                   func_type_map_t::find_type<FTs>>}),
-     ...);
-}
-
-template <DPNPFuncType... FTs>
-static void func_map_searching_2arg_3type_helper(func_map_t& fmap)
-{
-    ((func_map_searching_2arg_3type_core<FTs, FTs...>(fmap)), ...);
-}
-
 void func_map_init_searching(func_map_t& fmap)
 {
     fmap[DPNPFuncName::DPNP_FN_ARGMAX][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_argmax_default_c<int32_t, int32_t>};
@@ -430,7 +177,5 @@ void func_map_init_searching(func_map_t& fmap)
     fmap[DPNPFuncName::DPNP_FN_ARGMIN_EXT][eft_DBL][eft_INT] = {eft_INT, (void*)dpnp_argmin_ext_c<double, int32_t>};
     fmap[DPNPFuncName::DPNP_FN_ARGMIN_EXT][eft_DBL][eft_LNG] = {eft_LNG, (void*)dpnp_argmin_ext_c<double, int64_t>};
 
-    func_map_searching_2arg_3type_helper<eft_BLN, eft_INT, eft_LNG, eft_FLT, eft_DBL, eft_C64, eft_C128>(fmap);
-
     return;
 }
diff --git a/dpnp/dpnp_algo/dpnp_algo.pxd b/dpnp/dpnp_algo/dpnp_algo.pxd
index 09af5667f8c4..56195613a338 100644
--- a/dpnp/dpnp_algo/dpnp_algo.pxd
+++ b/dpnp/dpnp_algo/dpnp_algo.pxd
@@ -354,7 +354,6 @@ cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName":  # need this na
         DPNP_FN_VANDER_EXT
         DPNP_FN_VAR
         DPNP_FN_VAR_EXT
-        DPNP_FN_WHERE_EXT
         DPNP_FN_ZEROS
         DPNP_FN_ZEROS_LIKE
 
@@ -577,7 +576,6 @@ Searching functions
 """
 cpdef dpnp_descriptor dpnp_argmax(dpnp_descriptor array1)
 cpdef dpnp_descriptor dpnp_argmin(dpnp_descriptor array1)
-cpdef dpnp_descriptor dpnp_where(dpnp_descriptor cond_obj, dpnp_descriptor x_obj, dpnp_descriptor y_obj)
 
 """
 Trigonometric functions
diff --git a/dpnp/dpnp_algo/dpnp_algo_searching.pxi b/dpnp/dpnp_algo/dpnp_algo_searching.pxi
index 07c7dec5f657..55d2ff6fe440 100644
--- a/dpnp/dpnp_algo/dpnp_algo_searching.pxi
+++ b/dpnp/dpnp_algo/dpnp_algo_searching.pxi
@@ -38,7 +38,6 @@ and the rest of the library
 __all__ += [
     "dpnp_argmax",
     "dpnp_argmin",
-    "dpnp_where"
 ]
 
 
@@ -47,29 +46,6 @@ ctypedef c_dpctl.DPCTLSyclEventRef(*custom_search_1in_1out_func_ptr_t)(c_dpctl.D
                                                                        void * , void * , size_t,
                                                                        const c_dpctl.DPCTLEventVectorRef)
 
-ctypedef c_dpctl.DPCTLSyclEventRef(*where_func_ptr_t)(c_dpctl.DPCTLSyclQueueRef,
-                                                      void *,
-                                                      const size_t,
-                                                      const size_t,
-                                                      const shape_elem_type * ,
-                                                      const shape_elem_type * ,
-                                                      void *,
-                                                      const size_t,
-                                                      const size_t,
-                                                      const shape_elem_type * ,
-                                                      const shape_elem_type * ,
-                                                      void *,
-                                                      const size_t,
-                                                      const size_t,
-                                                      const shape_elem_type * ,
-                                                      const shape_elem_type * ,
-                                                      void *,
-                                                      const size_t,
-                                                      const size_t,
-                                                      const shape_elem_type * ,
-                                                      const shape_elem_type * ,
-                                                      const c_dpctl.DPCTLEventVectorRef) except +
-
 
 cpdef utils.dpnp_descriptor dpnp_argmax(utils.dpnp_descriptor in_array1):
     cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(in_array1.dtype)
@@ -141,81 +117,3 @@ cpdef utils.dpnp_descriptor dpnp_argmin(utils.dpnp_descriptor in_array1):
     c_dpctl.DPCTLEvent_Delete(event_ref)
 
     return result
-
-
-cpdef utils.dpnp_descriptor dpnp_where(utils.dpnp_descriptor cond_obj,
-                                       utils.dpnp_descriptor x_obj,
-                                       utils.dpnp_descriptor y_obj):
-    # Convert object type to C enum DPNPFuncType
-    cdef DPNPFuncType cond_c_type = dpnp_dtype_to_DPNPFuncType(cond_obj.dtype)
-    cdef DPNPFuncType x_c_type = dpnp_dtype_to_DPNPFuncType(x_obj.dtype)
-    cdef DPNPFuncType y_c_type = dpnp_dtype_to_DPNPFuncType(y_obj.dtype)
-
-    # get the FPTR data structure
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_WHERE_EXT, x_c_type, y_c_type)
-
-    # Create result array
-    cdef shape_type_c cond_shape = cond_obj.shape
-    cdef shape_type_c x_shape = x_obj.shape
-    cdef shape_type_c y_shape = y_obj.shape
-
-    cdef shape_type_c cond_strides = utils.strides_to_vector(cond_obj.strides, cond_shape)
-    cdef shape_type_c x_strides = utils.strides_to_vector(x_obj.strides, x_shape)
-    cdef shape_type_c y_strides = utils.strides_to_vector(y_obj.strides, y_shape)
-
-    cdef shape_type_c cond_x_shape = utils.get_common_shape(cond_shape, x_shape)
-    cdef shape_type_c cond_y_shape = utils.get_common_shape(cond_shape, y_shape)
-    cdef shape_type_c result_shape = utils.get_common_shape(cond_x_shape, cond_y_shape)
-    cdef utils.dpnp_descriptor result
-
-    result_usm_type, result_sycl_queue = utils_py.get_usm_allocations([cond_obj.get_array(),
-                                                                       x_obj.get_array(),
-                                                                       y_obj.get_array()])
-
-    # get FPTR function and return type
-    cdef where_func_ptr_t func = < where_func_ptr_t > kernel_data.ptr
-    cdef DPNPFuncType return_type = kernel_data.return_type
-
-    """ Create result array with type given by FPTR data """
-    result = utils.create_output_descriptor(result_shape,
-                                            return_type,
-                                            None,
-                                            device=None,
-                                            usm_type=result_usm_type,
-                                            sycl_queue=result_sycl_queue)
-
-    cdef shape_type_c result_strides = utils.strides_to_vector(result.strides, result_shape)
-
-    result_obj = result.get_array()
-
-    cdef c_dpctl.SyclQueue q = < c_dpctl.SyclQueue > result_obj.sycl_queue
-    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
-
-    """ Call FPTR function """
-    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref,
-                                                    result.get_data(),
-                                                    result.size,
-                                                    result.ndim,
-                                                    result_shape.data(),
-                                                    result_strides.data(),
-                                                    cond_obj.get_data(),
-                                                    cond_obj.size,
-                                                    cond_obj.ndim,
-                                                    cond_shape.data(),
-                                                    cond_strides.data(),
-                                                    x_obj.get_data(),
-                                                    x_obj.size,
-                                                    x_obj.ndim,
-                                                    x_shape.data(),
-                                                    x_strides.data(),
-                                                    y_obj.get_data(),
-                                                    y_obj.size,
-                                                    y_obj.ndim,
-                                                    y_shape.data(),
-                                                    y_strides.data(),
-                                                    NULL)  # dep_events_ref)
-
-    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
-    c_dpctl.DPCTLEvent_Delete(event_ref)
-
-    return result

From 40aa693ea06bff7c5f974541461f0db6226acd06 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Mon, 17 Apr 2023 14:19:24 +0200
Subject: [PATCH 116/129] Reuse dpctl.tensor.where in dpnp.where

---
 dpnp/dpnp_iface_searching.py                  | 37 ++++++++++---------
 tests/test_indexing.py                        | 19 ++++++++++
 .../cupy/sorting_tests/test_search.py         |  3 --
 3 files changed, 39 insertions(+), 20 deletions(-)

diff --git a/dpnp/dpnp_iface_searching.py b/dpnp/dpnp_iface_searching.py
index a0b17f4845ef..0a72ecae0f80 100644
--- a/dpnp/dpnp_iface_searching.py
+++ b/dpnp/dpnp_iface_searching.py
@@ -44,7 +44,10 @@
 from dpnp.dpnp_utils import *
 
 import dpnp
+from dpnp.dpnp_array import dpnp_array
+
 import numpy
+import dpctl.tensor as dpt
 
 
 __all__ = [
@@ -181,7 +184,7 @@ def where(condition, x=None, y=None, /):
     Return elements chosen from `x` or `y` depending on `condition`.
 
     When only `condition` is provided, this function is a shorthand for
-    :obj:`dpnp.nonzero(condition)`. 
+    :obj:`dpnp.nonzero(condition)`.
 
     For full documentation refer to :obj:`numpy.where`.
 
@@ -193,12 +196,13 @@ def where(condition, x=None, y=None, /):
 
     Limitations
     -----------
-    Parameters `condition`, `x` and `y` are supported as either scalar, :class:`dpnp.ndarray`
+    Parameter `condition` is supported as either :class:`dpnp.ndarray`
     or :class:`dpctl.tensor.usm_ndarray`.
+    Parameters `x` and `y` are supported as either scalar, :class:`dpnp.ndarray`
+    or :class:`dpctl.tensor.usm_ndarray`
     Otherwise the function will be executed sequentially on CPU.
-    Data type of `condition` parameter is limited by :obj:`dpnp.bool`.
     Input array data types of `x` and `y` are limited by supported DPNP :ref:`Data types`.
-        
+
     See Also
     --------
     :obj:`nonzero` : The function that is called when `x` and `y`are omitted.
@@ -220,18 +224,17 @@ def where(condition, x=None, y=None, /):
     elif missing == 2:
         return dpnp.nonzero(condition)
     elif missing == 0:
-        # get USM type and queue to copy scalar from the host memory into a USM allocation
-        usm_type, queue = get_usm_allocations([condition, x, y])
-
-        c_desc = dpnp.get_dpnp_descriptor(condition, copy_when_strides=False, copy_when_nondefault_queue=False,
-                                          alloc_usm_type=usm_type, alloc_queue=queue)
-        x_desc = dpnp.get_dpnp_descriptor(x, copy_when_strides=False, copy_when_nondefault_queue=False,
-                                          alloc_usm_type=usm_type, alloc_queue=queue)
-        y_desc = dpnp.get_dpnp_descriptor(y, copy_when_strides=False, copy_when_nondefault_queue=False,
-                                          alloc_usm_type=usm_type, alloc_queue=queue)
-        if c_desc and x_desc and y_desc:
-            if c_desc.dtype != dpnp.bool:
-                raise TypeError("condition must be a boolean array")
-            return dpnp_where(c_desc, x_desc, y_desc).get_pyobj()
+        check_input_type = lambda x: isinstance(x, (dpnp_array, dpt.usm_ndarray))
+        if check_input_type(condition):
+            if numpy.isscalar(x) or numpy.isscalar(y):
+                # get USM type and queue to copy scalar from the host memory into a USM allocation
+                usm_type, queue = get_usm_allocations([condition, x, y])
+                x = dpt.asarray(x, usm_type=usm_type, sycl_queue=queue) if numpy.isscalar(x) else x
+                y = dpt.asarray(y, usm_type=usm_type, sycl_queue=queue) if numpy.isscalar(y) else y
+            if check_input_type(x) and check_input_type(y):
+                dpt_condition = condition.get_array() if isinstance(condition, dpnp_array) else condition
+                dpt_x = x.get_array() if isinstance(x, dpnp_array) else x
+                dpt_y = y.get_array() if isinstance(y, dpnp_array) else y
+                return dpnp_array._create_from_usm_ndarray(dpt.where(dpt_condition, dpt_x, dpt_y))
 
     return call_origin(numpy.where, condition, x, y)
diff --git a/tests/test_indexing.py b/tests/test_indexing.py
index fb49d8c87495..022d42ca1b0f 100644
--- a/tests/test_indexing.py
+++ b/tests/test_indexing.py
@@ -581,3 +581,22 @@ def test_triu_indices_from(array, k):
     result = dpnp.triu_indices_from(ia, k)
     expected = numpy.triu_indices_from(a, k)
     assert_array_equal(expected, result)
+
+
+@pytest.mark.parametrize("cond_dtype", get_all_dtypes())
+@pytest.mark.parametrize("scalar_dtype", get_all_dtypes(no_none=True))
+def test_where_with_scalars(cond_dtype, scalar_dtype):
+    a = numpy.array([-1, 0, 1, 0], dtype=cond_dtype)
+    ia = dpnp.array(a)
+
+    result = dpnp.where(ia, scalar_dtype(1), scalar_dtype(0))
+    expected = numpy.where(a, scalar_dtype(1), scalar_dtype(0))
+    assert_array_equal(expected, result)
+
+    result = dpnp.where(ia, ia*2, scalar_dtype(0))
+    expected = numpy.where(a, a*2, scalar_dtype(0))
+    assert_array_equal(expected, result)
+
+    result = dpnp.where(ia, scalar_dtype(1), dpnp.array(0))
+    expected = numpy.where(a, scalar_dtype(1), numpy.array(0))
+    assert_array_equal(expected, result)
diff --git a/tests/third_party/cupy/sorting_tests/test_search.py b/tests/third_party/cupy/sorting_tests/test_search.py
index 17751aed75c0..b0531244a2fb 100644
--- a/tests/third_party/cupy/sorting_tests/test_search.py
+++ b/tests/third_party/cupy/sorting_tests/test_search.py
@@ -262,7 +262,6 @@ def test_argminmax_dtype(self, in_dtype, result_dtype):
     {'cond_shape': (2, 3, 4), 'x_shape': (2, 3, 4), 'y_shape': (3, 4)},
     {'cond_shape': (3, 4), 'x_shape': (2, 3, 4), 'y_shape': (4,)},
 )
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestWhereTwoArrays(unittest.TestCase):
 
@@ -274,8 +273,6 @@ def test_where_two_arrays(self, xp, cond_type, x_type, y_type):
         # Almost all values of a matrix `shaped_random` makes are not zero.
         # To make a sparse matrix, we need multiply `m`.
         cond = testing.shaped_random(self.cond_shape, xp, cond_type) * m
-        if xp is cupy:
-            cond = cond.astype(cupy.bool)
         x = testing.shaped_random(self.x_shape, xp, x_type, seed=0)
         y = testing.shaped_random(self.y_shape, xp, y_type, seed=1)
         return xp.where(cond, x, y)

From b69ab1e46a83112e21eb0b958b34c98229d73e4c Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Mon, 17 Apr 2023 22:46:34 +0200
Subject: [PATCH 117/129] Fix small remark

---
 dpnp/dpnp_algo/dpnp_algo_searching.pxi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dpnp/dpnp_algo/dpnp_algo_searching.pxi b/dpnp/dpnp_algo/dpnp_algo_searching.pxi
index 55d2ff6fe440..46f1c83f42b5 100644
--- a/dpnp/dpnp_algo/dpnp_algo_searching.pxi
+++ b/dpnp/dpnp_algo/dpnp_algo_searching.pxi
@@ -37,7 +37,7 @@ and the rest of the library
 
 __all__ += [
     "dpnp_argmax",
-    "dpnp_argmin",
+    "dpnp_argmin"
 ]
 
 

From 85f0b2c09bb251dafb96b051b5737ec6944bcc81 Mon Sep 17 00:00:00 2001
From: Anton Volkov <antonwolfy@gmail.com>
Date: Tue, 6 Jun 2023 09:58:11 -0500
Subject: [PATCH 118/129] Remove debug empty print from test_eigenvalue.py

---
 tests/third_party/cupy/linalg_tests/test_eigenvalue.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/third_party/cupy/linalg_tests/test_eigenvalue.py b/tests/third_party/cupy/linalg_tests/test_eigenvalue.py
index fe577e32b285..80ff92a1093f 100644
--- a/tests/third_party/cupy/linalg_tests/test_eigenvalue.py
+++ b/tests/third_party/cupy/linalg_tests/test_eigenvalue.py
@@ -85,7 +85,6 @@ def test_eigh_batched(self, xp, dtype):
     @testing.for_complex_dtypes()
     @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4, contiguous_check=False)
     def test_eigh_complex_batched(self, xp, dtype):
-        print()
         a = xp.array([[[1, 2j, 3], [4j, 5, 6j], [7, 8j, 9]],
                       [[0, 2j, 3], [4j, 4, 6j], [7, 8j, 8]]], dtype)
         w, v = xp.linalg.eigh(a, UPLO=self.UPLO)

From c088ee9ce2158d037ebb0d2dc25c705494a065fe Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Wed, 7 Jun 2023 11:03:42 +0200
Subject: [PATCH 119/129] Pin minimum version of cmake >=3.21 for
 generate_coverage

---
 .github/workflows/generate_coverage.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/generate_coverage.yaml b/.github/workflows/generate_coverage.yaml
index fd38dde9a5d3..bd3c8c366f0e 100644
--- a/.github/workflows/generate_coverage.yaml
+++ b/.github/workflows/generate_coverage.yaml
@@ -41,7 +41,7 @@ jobs:
           sudo apt-get install lcov
       - name: Install dpnp dependencies
         run: |
-          conda install cython llvm cmake scikit-build ninja pytest pytest-cov coverage[toml] \
+          conda install cython llvm cmake">=3.21" scikit-build ninja pytest pytest-cov coverage[toml] \
               dpctl dpcpp_linux-64 sysroot_linux-64">=2.28" mkl-devel-dpcpp tbb-devel onedpl-devel ${{ env.CHANNELS }}
       - name: Conda info
         run: |

From bea6107e37460e96667eb07be451378466b859bc Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Wed, 7 Jun 2023 11:07:26 +0200
Subject: [PATCH 120/129] Set a new coverage badge

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index b10394c0eec0..c042dd0f256f 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 [![Pre-commit](https://github.com/IntelPython/dpnp/actions/workflows/pre-commit.yml/badge.svg?branch=master&event=push)](https://github.com/IntelPython/dpnp/actions/workflows/pre-commit.yml)
 [![Conda package](https://github.com/IntelPython/dpnp/actions/workflows/conda-package.yml/badge.svg?branch=master&event=push)](https://github.com/IntelPython/dpnp/actions/workflows/conda-package.yml)
-[![codecov](https://codecov.io/gh/IntelPython/dpnp/branch/master/graph/badge.svg)](https://codecov.io/gh/IntelPython/dpnp)
+[![Coverage Status](https://coveralls.io/repos/github/IntelPython/dpnp/badge.svg?branch=master)](https://coveralls.io/github/IntelPython/dpnp?branch=master)
 [![Build Sphinx](https://github.com/IntelPython/dpnp/workflows/Build%20Sphinx/badge.svg)](https://intelpython.github.io/dpnp)
 
 # DPNP - Data Parallel Extension for NumPy*

From d4bd63abbd7ee3e8d1319626a30fb8680fb4eaea Mon Sep 17 00:00:00 2001
From: Anton Volkov <antonwolfy@gmail.com>
Date: Thu, 1 Jun 2023 08:06:03 -0500
Subject: [PATCH 121/129] Improve dpnp.div() implementation:     - reuse
 dpctl.tensor.div()     - write pybind11 extension for div() from OneMKL VM

---
 dpnp/CMakeLists.txt                         |   1 +
 dpnp/backend/extensions/vm/CMakeLists.txt   |  75 +++++
 dpnp/backend/extensions/vm/div.cpp          | 307 ++++++++++++++++++++
 dpnp/backend/extensions/vm/div.hpp          |  57 ++++
 dpnp/backend/extensions/vm/types_matrix.hpp |  67 +++++
 dpnp/backend/extensions/vm/vm_py.cpp        |  71 +++++
 dpnp/dpnp_algo/dpnp_algo.pxd                |   3 -
 dpnp/dpnp_algo/dpnp_algo.pyx                |   2 +-
 dpnp/dpnp_algo/dpnp_algo_mathematical.pxi   |   9 -
 dpnp/dpnp_algo/dpnp_elementwise_common.py   |  84 ++++++
 dpnp/dpnp_iface.py                          |  27 ++
 dpnp/dpnp_iface_mathematical.py             |  28 +-
 tests/test_sycl_queue.py                    |   5 +-
 13 files changed, 708 insertions(+), 28 deletions(-)
 create mode 100644 dpnp/backend/extensions/vm/CMakeLists.txt
 create mode 100644 dpnp/backend/extensions/vm/div.cpp
 create mode 100644 dpnp/backend/extensions/vm/div.hpp
 create mode 100644 dpnp/backend/extensions/vm/types_matrix.hpp
 create mode 100644 dpnp/backend/extensions/vm/vm_py.cpp
 create mode 100644 dpnp/dpnp_algo/dpnp_elementwise_common.py

diff --git a/dpnp/CMakeLists.txt b/dpnp/CMakeLists.txt
index 54be4eb23b9a..89524ab1c58d 100644
--- a/dpnp/CMakeLists.txt
+++ b/dpnp/CMakeLists.txt
@@ -48,6 +48,7 @@ endfunction()
 build_dpnp_cython_ext_with_backend(dparray ${CMAKE_CURRENT_SOURCE_DIR}/dparray.pyx dpnp)
 add_subdirectory(backend)
 add_subdirectory(backend/extensions/lapack)
+add_subdirectory(backend/extensions/vm)
 
 add_subdirectory(dpnp_algo)
 add_subdirectory(dpnp_utils)
diff --git a/dpnp/backend/extensions/vm/CMakeLists.txt b/dpnp/backend/extensions/vm/CMakeLists.txt
new file mode 100644
index 000000000000..8f3086ec3a9e
--- /dev/null
+++ b/dpnp/backend/extensions/vm/CMakeLists.txt
@@ -0,0 +1,75 @@
+# *****************************************************************************
+# Copyright (c) 2023, Intel Corporation
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# - Redistributions of source code must retain the above copyright notice,
+#   this list of conditions and the following disclaimer.
+# - Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+# THE POSSIBILITY OF SUCH DAMAGE.
+# *****************************************************************************
+
+
+set(python_module_name _vm_impl)
+pybind11_add_module(${python_module_name} MODULE
+    vm_py.cpp
+    div.cpp
+)
+
+if (WIN32)
+    if (${CMAKE_VERSION} VERSION_LESS "3.27")
+        # this is a work-around for target_link_options inserting option after -link option, cause
+        # linker to ignore it.
+        set(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} -fsycl-device-code-split=per_kernel")
+    endif()
+endif()
+
+set_target_properties(${python_module_name} PROPERTIES CMAKE_POSITION_INDEPENDENT_CODE ON)
+
+target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../include)
+target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../src)
+
+target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIRS})
+target_include_directories(${python_module_name} PUBLIC ${Dpctl_TENSOR_INCLUDE_DIR})
+
+if (WIN32)
+  target_compile_options(${python_module_name} PRIVATE
+    /clang:-fno-approx-func
+    /clang:-fno-finite-math-only
+    )
+else()
+  target_compile_options(${python_module_name} PRIVATE
+    -fno-approx-func
+    -fno-finite-math-only
+    )
+endif()
+
+target_link_options(${python_module_name} PUBLIC -fsycl-device-code-split=per_kernel)
+if (UNIX)
+    # this option is support on Linux only
+    target_link_options(${python_module_name} PUBLIC -fsycl-link-huge-device-code)
+endif()
+
+if (DPNP_GENERATE_COVERAGE)
+    target_link_options(${python_module_name} PRIVATE -fprofile-instr-generate -fcoverage-mapping)
+endif()
+
+target_link_libraries(${python_module_name} PUBLIC MKL::MKL_DPCPP)
+
+install(TARGETS ${python_module_name}
+  DESTINATION "dpnp/backend/extensions/vm"
+)
diff --git a/dpnp/backend/extensions/vm/div.cpp b/dpnp/backend/extensions/vm/div.cpp
new file mode 100644
index 000000000000..48b64959b88a
--- /dev/null
+++ b/dpnp/backend/extensions/vm/div.cpp
@@ -0,0 +1,307 @@
+//*****************************************************************************
+// Copyright (c) 2023, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+
+#include <pybind11/pybind11.h>
+
+// dpctl tensor headers
+#include "utils/memory_overlap.hpp"
+#include "utils/type_utils.hpp"
+
+#include "div.hpp"
+#include "types_matrix.hpp"
+
+#include "dpnp_utils.hpp"
+
+
+namespace dpnp
+{
+namespace backend
+{
+namespace ext
+{
+namespace vm
+{
+
+namespace mkl_vm = oneapi::mkl::vm;
+namespace py = pybind11;
+namespace type_utils = dpctl::tensor::type_utils;
+
+typedef sycl::event (*div_impl_fn_ptr_t)(
+    sycl::queue, const std::int64_t, const char*, const char*, char*, const std::vector<sycl::event>&);
+
+static div_impl_fn_ptr_t div_dispatch_vector[dpctl_td_ns::num_types];
+
+template <typename T>
+static sycl::event div_impl(sycl::queue exec_q,
+                            const std::int64_t n,
+                            const char* in_a,
+                            const char* in_b,
+                            char* out_y,
+                            const std::vector<sycl::event>& depends)
+{
+    type_utils::validate_type_for_device<T>(exec_q);
+
+    const T* a = reinterpret_cast<const T*>(in_a);
+    const T* b = reinterpret_cast<const T*>(in_b);
+    T* y = reinterpret_cast<T*>(out_y);
+
+    return mkl_vm::div(exec_q,
+                       n, // number of elements to be calculated
+                       a, // pointer `a` containing 1st input vector of size n
+                       b, // pointer `b` containing 2nd input vector of size n
+                       y, // pointer `y` to the output vector of size n
+                       depends);
+}
+
+std::pair<sycl::event, sycl::event> div(sycl::queue exec_q,
+                                        dpctl::tensor::usm_ndarray src1,
+                                        dpctl::tensor::usm_ndarray src2,
+                                        dpctl::tensor::usm_ndarray dst, // dst = op(src1, src2), elementwise
+                                        const std::vector<sycl::event>& depends)
+{
+    // check type_nums
+    int src1_typenum = src1.get_typenum();
+    int src2_typenum = src2.get_typenum();
+    int dst_typenum = dst.get_typenum();
+
+    auto array_types = dpctl_td_ns::usm_ndarray_types();
+    int src1_typeid = array_types.typenum_to_lookup_id(src1_typenum);
+    int src2_typeid = array_types.typenum_to_lookup_id(src2_typenum);
+    int dst_typeid = array_types.typenum_to_lookup_id(dst_typenum);
+
+    if (src1_typeid != src2_typeid || src2_typeid != dst_typeid)
+    {
+        throw py::value_error("Either any of input arrays or output array have different types");
+    }
+
+    // check that queues are compatible
+    if (!dpctl::utils::queues_are_compatible(exec_q, {src1, src2, dst}))
+    {
+        throw py::value_error("Execution queue is not compatible with allocation queues");
+    }
+
+    // check shapes, broadcasting is assumed done by caller
+    // check that dimensions are the same
+    int dst_nd = dst.get_ndim();
+    if (dst_nd != src1.get_ndim() || dst_nd != src2.get_ndim())
+    {
+        throw py::value_error("Array dimensions are not the same.");
+    }
+
+    // check that shapes are the same
+    const py::ssize_t* src1_shape = src1.get_shape_raw();
+    const py::ssize_t* src2_shape = src2.get_shape_raw();
+    const py::ssize_t* dst_shape = dst.get_shape_raw();
+    bool shapes_equal(true);
+    size_t src_nelems(1);
+
+    for (int i = 0; i < dst_nd; ++i)
+    {
+        src_nelems *= static_cast<size_t>(src1_shape[i]);
+        shapes_equal = shapes_equal && (src1_shape[i] == dst_shape[i] && src2_shape[i] == dst_shape[i]);
+    }
+    if (!shapes_equal)
+    {
+        throw py::value_error("Array shapes are not the same.");
+    }
+
+    // if nelems is zero, return
+    if (src_nelems == 0)
+    {
+        return std::make_pair(sycl::event(), sycl::event());
+    }
+
+    // ensure that output is ample enough to accomodate all elements
+    auto dst_offsets = dst.get_minmax_offsets();
+    // destination must be ample enough to accomodate all elements
+    {
+        size_t range = static_cast<size_t>(dst_offsets.second - dst_offsets.first);
+        if (range + 1 < src_nelems)
+        {
+            throw py::value_error(
+                "Destination array can not accomodate all the "
+                "elements of source array.");
+        }
+    }
+
+    // check memory overlap
+    auto const& overlap = dpctl::tensor::overlap::MemoryOverlap();
+    if (overlap(src1, dst) || overlap(src2, dst))
+    {
+        throw py::value_error("Arrays index overlapping segments of memory");
+    }
+
+    const char* src1_data = src1.get_data();
+    const char* src2_data = src2.get_data();
+    char* dst_data = dst.get_data();
+
+    // handle contiguous inputs
+    bool is_src1_c_contig = src1.is_c_contiguous();
+    bool is_src2_c_contig = src2.is_c_contiguous();
+    bool is_dst_c_contig = dst.is_c_contiguous();
+
+    bool all_c_contig = (is_src1_c_contig && is_src2_c_contig && is_dst_c_contig);
+    if (!all_c_contig)
+    {
+        throw py::value_error("Input and outpur arrays must be C-contiguous");
+    }
+
+    auto div_fn = div_dispatch_vector[dst_typeid];
+    if (div_fn == nullptr)
+    {
+        throw py::value_error("No div implementation defined");
+    }
+    sycl::event sum_ev = div_fn(exec_q, src_nelems, src1_data, src2_data, dst_data, depends);
+
+    sycl::event ht_ev = dpctl::utils::keep_args_alive(exec_q, {src1, src2, dst}, {sum_ev});
+    return std::make_pair(ht_ev, sum_ev);
+}
+
+bool can_call_div(sycl::queue exec_q,
+                  dpctl::tensor::usm_ndarray src1,
+                  dpctl::tensor::usm_ndarray src2,
+                  dpctl::tensor::usm_ndarray dst)
+{
+    // check type_nums
+    int src1_typenum = src1.get_typenum();
+    int src2_typenum = src2.get_typenum();
+    int dst_typenum = dst.get_typenum();
+
+    auto array_types = dpctl_td_ns::usm_ndarray_types();
+    int src1_typeid = array_types.typenum_to_lookup_id(src1_typenum);
+    int src2_typeid = array_types.typenum_to_lookup_id(src2_typenum);
+    int dst_typeid = array_types.typenum_to_lookup_id(dst_typenum);
+
+    // types must be the same
+    if (src1_typeid != src2_typeid || src2_typeid != dst_typeid)
+    {
+        return false;
+    }
+
+    // OneMKL VM functions perform a copy on host if no double type support
+    if (!exec_q.get_device().has(sycl::aspect::fp64))
+    {
+        return false;
+    }
+
+    // check that queues are compatible
+    if (!dpctl::utils::queues_are_compatible(exec_q, {src1, src2, dst}))
+    {
+        return false;
+    }
+
+    // dimensions must be the same
+    int dst_nd = dst.get_ndim();
+    if (dst_nd != src1.get_ndim() || dst_nd != src2.get_ndim())
+    {
+        return false;
+    }
+
+    // shapes must be the same
+    const py::ssize_t* src1_shape = src1.get_shape_raw();
+    const py::ssize_t* src2_shape = src2.get_shape_raw();
+    const py::ssize_t* dst_shape = dst.get_shape_raw();
+    bool shapes_equal(true);
+    size_t src_nelems(1);
+
+    for (int i = 0; i < dst_nd; ++i)
+    {
+        src_nelems *= static_cast<size_t>(src1_shape[i]);
+        shapes_equal = shapes_equal && (src1_shape[i] == dst_shape[i] && src2_shape[i] == dst_shape[i]);
+    }
+    if (!shapes_equal)
+    {
+        return false;
+    }
+
+    // if nelems is zero, return false
+    if (src_nelems == 0)
+    {
+        return false;
+    }
+
+    // ensure that output is ample enough to accomodate all elements
+    auto dst_offsets = dst.get_minmax_offsets();
+    // destination must be ample enough to accomodate all elements
+    {
+        size_t range = static_cast<size_t>(dst_offsets.second - dst_offsets.first);
+        if (range + 1 < src_nelems)
+        {
+            return false;
+        }
+    }
+
+    // check memory overlap
+    auto const& overlap = dpctl::tensor::overlap::MemoryOverlap();
+    if (overlap(src1, dst) || overlap(src2, dst))
+    {
+        return false;
+    }
+
+    // suppport only contiguous inputs
+    bool is_src1_c_contig = src1.is_c_contiguous();
+    bool is_src2_c_contig = src2.is_c_contiguous();
+    bool is_dst_c_contig = dst.is_c_contiguous();
+
+    bool all_c_contig = (is_src1_c_contig && is_src2_c_contig && is_dst_c_contig);
+    if (!all_c_contig)
+    {
+        return false;
+    }
+
+    // MKL function is not defined for the type
+    if (div_dispatch_vector[src1_typeid] == nullptr)
+    {
+        return false;
+    }
+    return true;
+}
+
+template <typename fnT, typename T>
+struct DivContigFactory
+{
+    fnT get()
+    {
+        if constexpr (types::DivTypePairSupportFactory<T>::is_defined)
+        {
+            return div_impl<T>;
+        }
+        else
+        {
+            return nullptr;
+        }
+    }
+};
+
+void init_div_dispatch_vector(void)
+{
+    dpctl_td_ns::DispatchVectorBuilder<div_impl_fn_ptr_t, DivContigFactory, dpctl_td_ns::num_types> contig;
+    contig.populate_dispatch_vector(div_dispatch_vector);
+}
+} // namespace vm
+} // namespace ext
+} // namespace backend
+} // namespace dpnp
diff --git a/dpnp/backend/extensions/vm/div.hpp b/dpnp/backend/extensions/vm/div.hpp
new file mode 100644
index 000000000000..d530a156aa4a
--- /dev/null
+++ b/dpnp/backend/extensions/vm/div.hpp
@@ -0,0 +1,57 @@
+//*****************************************************************************
+// Copyright (c) 2023, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#pragma once
+
+#include <CL/sycl.hpp>
+#include <oneapi/mkl.hpp>
+
+#include <dpctl4pybind11.hpp>
+#include <pybind11/numpy.h>
+
+namespace dpnp
+{
+namespace backend
+{
+namespace ext
+{
+namespace vm
+{
+    extern std::pair<sycl::event, sycl::event> div(sycl::queue exec_q,
+                                                   dpctl::tensor::usm_ndarray src1,
+                                                   dpctl::tensor::usm_ndarray src2,
+                                                   dpctl::tensor::usm_ndarray dst,
+                                                   const std::vector<sycl::event>& depends);
+
+    extern bool can_call_div(sycl::queue exec_q,
+                             dpctl::tensor::usm_ndarray src1,
+                             dpctl::tensor::usm_ndarray src2,
+                             dpctl::tensor::usm_ndarray dst);
+
+    extern void init_div_dispatch_vector(void);
+} // namespace vm
+} // namespace ext
+} // namespace backend
+} // namespace dpnp
diff --git a/dpnp/backend/extensions/vm/types_matrix.hpp b/dpnp/backend/extensions/vm/types_matrix.hpp
new file mode 100644
index 000000000000..af7c9c47e731
--- /dev/null
+++ b/dpnp/backend/extensions/vm/types_matrix.hpp
@@ -0,0 +1,67 @@
+//*****************************************************************************
+// Copyright (c) 2023, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#pragma once
+
+#include <type_traits>
+
+// dpctl tensor headers
+#include "utils/type_dispatch.hpp"
+
+// dpctl namespace for operations with types
+namespace dpctl_td_ns = dpctl::tensor::type_dispatch;
+
+namespace dpnp
+{
+namespace backend
+{
+namespace ext
+{
+namespace vm
+{
+namespace types
+{
+    /**
+     * @brief A factory to define pairs of supported types for which
+     * MKL VM library provides support in oneapi::mkl::vm::div<T> function.
+     *
+     * @tparam T Type of input vectors `a` and `b` and of result vector `y`.
+     */
+    template <typename T>
+    struct DivTypePairSupportFactory
+    {
+        static constexpr bool is_defined = std::disjunction<
+            dpctl_td_ns::TypePairDefinedEntry<T, std::complex<double>, T, std::complex<double>>,
+            dpctl_td_ns::TypePairDefinedEntry<T, std::complex<float>, T, std::complex<float>>,
+            dpctl_td_ns::TypePairDefinedEntry<T, double, T, double>,
+            dpctl_td_ns::TypePairDefinedEntry<T, float, T, float>,
+            // fall-through
+            dpctl_td_ns::NotDefinedEntry>::is_defined;
+    };
+} // namespace types
+} // namespace vm
+} // namespace ext
+} // namespace backend
+} // namespace dpnp
diff --git a/dpnp/backend/extensions/vm/vm_py.cpp b/dpnp/backend/extensions/vm/vm_py.cpp
new file mode 100644
index 000000000000..9a040b1f59b0
--- /dev/null
+++ b/dpnp/backend/extensions/vm/vm_py.cpp
@@ -0,0 +1,71 @@
+//*****************************************************************************
+// Copyright (c) 2023, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+//
+// This file defines functions of dpnp.backend._lapack_impl extensions
+//
+//*****************************************************************************
+
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+
+#include "div.hpp"
+
+namespace vm_ext = dpnp::backend::ext::vm;
+namespace py = pybind11;
+
+// populate dispatch vectors
+void init_dispatch_vectors(void)
+{
+    vm_ext::init_div_dispatch_vector();
+}
+
+// populate dispatch tables
+void init_dispatch_tables(void)
+{
+}
+
+PYBIND11_MODULE(_vm_impl, m)
+{
+    init_dispatch_vectors();
+    init_dispatch_tables();
+
+    m.def("_div",
+          &vm_ext::div,
+          "Call `div` from OneMKL VM library to performs element by element division "
+          "of vector `src1` by vector `src2` to resulting vector `dst`",
+          py::arg("sycl_queue"),
+          py::arg("src1"),
+          py::arg("src2"),
+          py::arg("dst"),
+          py::arg("depends") = py::list());
+
+    m.def("_can_call_div",
+          &vm_ext::can_call_div,
+          "Check input arrays to answer if `div` function from OneMKL VM library can be used",
+          py::arg("sycl_queue"),
+          py::arg("src1"),
+          py::arg("src2"),
+          py::arg("dst"));
+}
diff --git a/dpnp/dpnp_algo/dpnp_algo.pxd b/dpnp/dpnp_algo/dpnp_algo.pxd
index 56195613a338..0120aa6b453a 100644
--- a/dpnp/dpnp_algo/dpnp_algo.pxd
+++ b/dpnp/dpnp_algo/dpnp_algo.pxd
@@ -118,7 +118,6 @@ cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName":  # need this na
         DPNP_FN_DIAGONAL
         DPNP_FN_DIAGONAL_EXT
         DPNP_FN_DIVIDE
-        DPNP_FN_DIVIDE_EXT
         DPNP_FN_DOT
         DPNP_FN_DOT_EXT
         DPNP_FN_EDIFF1D
@@ -531,8 +530,6 @@ cpdef dpnp_descriptor dpnp_add(dpnp_descriptor x1_obj, dpnp_descriptor x2_obj, o
                                dpnp_descriptor out=*, object where=*)
 cpdef dpnp_descriptor dpnp_arctan2(dpnp_descriptor x1_obj, dpnp_descriptor x2_obj, object dtype=*,
                                    dpnp_descriptor out=*, object where=*)
-cpdef dpnp_descriptor dpnp_divide(dpnp_descriptor x1_obj, dpnp_descriptor x2_obj, object dtype=*,
-                                  dpnp_descriptor out=*, object where=*)
 cpdef dpnp_descriptor dpnp_hypot(dpnp_descriptor x1_obj, dpnp_descriptor x2_obj, object dtype=*,
                                  dpnp_descriptor out=*, object where=*)
 cpdef dpnp_descriptor dpnp_maximum(dpnp_descriptor x1_obj, dpnp_descriptor x2_obj, object dtype=*,
diff --git a/dpnp/dpnp_algo/dpnp_algo.pyx b/dpnp/dpnp_algo/dpnp_algo.pyx
index 24abd1b4b9e4..8b6d4be73e94 100644
--- a/dpnp/dpnp_algo/dpnp_algo.pyx
+++ b/dpnp/dpnp_algo/dpnp_algo.pyx
@@ -499,7 +499,7 @@ cdef utils.dpnp_descriptor call_fptr_2in_1out_strides(DPNPFuncName fptr_name,
     # get FPTR function and return type
     cdef fptr_2in_1out_strides_t func = NULL
     cdef DPNPFuncType return_type = DPNP_FT_NONE
-    if fptr_name != DPNP_FN_DIVIDE_EXT or result_sycl_device.has_aspect_fp64:
+    if result_sycl_device.has_aspect_fp64:
         return_type = kernel_data.return_type
         func = < fptr_2in_1out_strides_t > kernel_data.ptr
     else:
diff --git a/dpnp/dpnp_algo/dpnp_algo_mathematical.pxi b/dpnp/dpnp_algo/dpnp_algo_mathematical.pxi
index b5534c7c1a8e..3a002fdd4ba7 100644
--- a/dpnp/dpnp_algo/dpnp_algo_mathematical.pxi
+++ b/dpnp/dpnp_algo/dpnp_algo_mathematical.pxi
@@ -47,7 +47,6 @@ __all__ += [
     "dpnp_cumprod",
     "dpnp_cumsum",
     "dpnp_diff",
-    "dpnp_divide",
     "dpnp_ediff1d",
     "dpnp_fabs",
     "dpnp_floor",
@@ -249,14 +248,6 @@ cpdef utils.dpnp_descriptor dpnp_diff(utils.dpnp_descriptor x1, int n):
     return dpnp_diff(res, n - 1)
 
 
-cpdef utils.dpnp_descriptor dpnp_divide(utils.dpnp_descriptor x1_obj,
-                                        utils.dpnp_descriptor x2_obj,
-                                        object dtype=None,
-                                        utils.dpnp_descriptor out=None,
-                                        object where=True):
-    return call_fptr_2in_1out_strides(DPNP_FN_DIVIDE_EXT, x1_obj, x2_obj, dtype, out, where)
-
-
 cpdef utils.dpnp_descriptor dpnp_ediff1d(utils.dpnp_descriptor x1):
 
     if x1.size <= 1:
diff --git a/dpnp/dpnp_algo/dpnp_elementwise_common.py b/dpnp/dpnp_algo/dpnp_elementwise_common.py
new file mode 100644
index 000000000000..15481941f85f
--- /dev/null
+++ b/dpnp/dpnp_algo/dpnp_elementwise_common.py
@@ -0,0 +1,84 @@
+# cython: language_level=3
+# distutils: language = c++
+# -*- coding: utf-8 -*-
+# *****************************************************************************
+# Copyright (c) 2023, Intel Corporation
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# - Redistributions of source code must retain the above copyright notice,
+#   this list of conditions and the following disclaimer.
+# - Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+# THE POSSIBILITY OF SUCH DAMAGE.
+# *****************************************************************************
+
+
+import dpnp
+from dpnp.dpnp_array import dpnp_array
+import dpnp.backend.extensions.vm._vm_impl as vmi
+
+from dpctl.tensor._elementwise_common import (
+    BinaryElementwiseFunc
+)
+import dpctl.tensor._tensor_impl as ti
+
+
+__all__ = [
+    "dpnp_divide"
+]
+
+
+_divide_docstring_ = """
+divide(x1, x2, out=None, order='K')
+
+Calculates the ratio for each element `x1_i` of the input array `x1` with
+the respective element `x2_i` of the input array `x2`.
+
+Args:
+    x1 (dpnp.ndarray):
+        First input array, expected to have numeric data type.
+    x2 (dpnp.ndarray):
+        Second input array, also expected to have numeric data type.
+Returns:
+    dpnp.ndarray:
+        an array containing the result of element-wise division. The data type
+        of the returned array is determined by the Type Promotion Rules.
+"""
+
+def dpnp_divide(x1, x2, out=None, order='K'):
+    """
+    Invokes div() function from pybind11 extension of OneMKL VM if possible.
+    Otherwise fully relies on dpctl.tensor implementation for divide() function.
+
+    """
+
+    def _call_divide(src1, src2, dst, sycl_queue, depends=[]):
+        """A callback to register in BinaryElementwiseFunc class of dpctl.tensor"""
+
+        if vmi._can_call_div(sycl_queue, src1, src2, dst):
+            # call pybind11 extension for div() function from OneMKL VM
+            return vmi._div(sycl_queue, src1, src2, dst, depends)
+        return ti._divide(src1, src2, dst, sycl_queue, depends)
+
+    # dpctl.tensor only works with usm_ndarray or scalar
+    x1_usm_or_scalar = dpnp.get_usm_ndarray_or_scalar(x1)
+    x2_usm_or_scalar = dpnp.get_usm_ndarray_or_scalar(x2)
+    out_usm = None if out is None else dpnp.get_usm_ndarray(out)
+
+    func = BinaryElementwiseFunc("divide", ti._divide_result_type, _call_divide, _divide_docstring_)
+    res_usm = func(x1_usm_or_scalar, x2_usm_or_scalar, out=out_usm, order=order)
+    return dpnp_array._create_from_usm_ndarray(res_usm)
diff --git a/dpnp/dpnp_iface.py b/dpnp/dpnp_iface.py
index ce3c540539d6..93629440e7d2 100644
--- a/dpnp/dpnp_iface.py
+++ b/dpnp/dpnp_iface.py
@@ -69,6 +69,7 @@
     "get_include",
     "get_normalized_queue_device",
     "get_usm_ndarray",
+    "get_usm_ndarray_or_scalar",
     "is_supported_array_type"
 ]
 
@@ -404,6 +405,32 @@ def get_usm_ndarray(a):
     raise TypeError("An array must be any of supported type, but got {}".format(type(a)))
 
 
+def get_usm_ndarray_or_scalar(a):
+    """
+    Return scalar or :class:`dpctl.tensor.usm_ndarray` from input object `a`.
+
+    Parameters
+    ----------
+    a : {scalar, dpnp_array, usm_ndarray}
+        Input of any supported type: scalar, :class:`dpnp.ndarray`
+        or :class:`dpctl.tensor.usm_ndarray`.
+
+    Returns
+    -------
+    out : scalar, usm_ndarray
+        A scalar if the input `a` is scalar.
+        A dpctl USM ndarray if the input `a` is array.
+
+    Raises
+    ------
+    TypeError
+        If input parameter `a` is of unsupported object type.
+
+    """
+
+    return a if isscalar(a) else get_usm_ndarray(a)
+
+
 def is_supported_array_type(a):
     """
     Return ``True`` if an array of either type :class:`dpnp.ndarray`
diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py
index 98dcc71d31af..a7f5a7e8a0cb 100644
--- a/dpnp/dpnp_iface_mathematical.py
+++ b/dpnp/dpnp_iface_mathematical.py
@@ -40,8 +40,11 @@
 """
 
 
-from dpnp.dpnp_algo import *
-from dpnp.dpnp_utils import *
+from .dpnp_algo import *
+from .dpnp_algo.dpnp_elementwise_common import (
+    dpnp_divide
+)
+from .dpnp_utils import *
 
 import dpnp
 
@@ -586,6 +589,7 @@ def divide(x1,
            out=None,
            *,
            where=True,
+           order='K',
            dtype=None,
            subok=True,
            **kwargs):
@@ -617,28 +621,24 @@ def divide(x1,
 
     """
 
-    if out is not None:
-        pass
-    elif where is not True:
+    if where is not True:
         pass
     elif dtype is not None:
         pass
     elif subok is not True:
         pass
+    elif kwargs:
+        pass
     elif dpnp.isscalar(x1) and dpnp.isscalar(x2):
         # at least either x1 or x2 has to be an array
         pass
     else:
-        # get USM type and queue to copy scalar from the host memory into a USM allocation
-        usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None)
-
-        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False,
-                                           alloc_usm_type=usm_type, alloc_queue=queue)
-        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False,
-                                           alloc_usm_type=usm_type, alloc_queue=queue)
-        if x1_desc and x2_desc:
-            return dpnp_divide(x1_desc, x2_desc, dtype=dtype, out=out, where=where).get_pyobj()
+        if order in "afkcAFKC":
+            order = order.upper()
+        else:
+            raise ValueError("order must be one of 'C', 'F', 'A', or 'K' (got '{}')".format(order))
 
+        return dpnp_divide(x1, x2, out=out, order=order)
     return call_origin(numpy.divide, x1, x2, out=out, where=where, dtype=dtype, subok=subok, **kwargs)
 
 
diff --git a/tests/test_sycl_queue.py b/tests/test_sycl_queue.py
index e5e53646e1a1..3182003a90be 100644
--- a/tests/test_sycl_queue.py
+++ b/tests/test_sycl_queue.py
@@ -6,6 +6,9 @@
 
 import dpnp
 import dpctl
+from dpctl.utils import (
+    ExecutionPlacementError
+)
 import numpy
 
 from numpy.testing import (
@@ -431,7 +434,7 @@ def test_broadcasting(func, data1, data2, device):
 def test_2in_1out_diff_queue_but_equal_context(func, device):
     x1 = dpnp.arange(10)
     x2 = dpnp.arange(10, sycl_queue=dpctl.SyclQueue(device))[::-1]
-    with assert_raises(ValueError):
+    with assert_raises((ValueError, ExecutionPlacementError)):
         getattr(dpnp, func)(x1, x2)
 
 

From be8c671448cf39637dd3fd67ef9e4e064d0228e8 Mon Sep 17 00:00:00 2001
From: Anton Volkov <antonwolfy@gmail.com>
Date: Tue, 6 Jun 2023 09:51:53 -0500
Subject: [PATCH 122/129] Add support of order=None

---
 dpnp/dpnp_iface_mathematical.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py
index a7f5a7e8a0cb..9e877703b409 100644
--- a/dpnp/dpnp_iface_mathematical.py
+++ b/dpnp/dpnp_iface_mathematical.py
@@ -635,6 +635,8 @@ def divide(x1,
     else:
         if order in "afkcAFKC":
             order = order.upper()
+        elif order is None:
+            order = 'K'
         else:
             raise ValueError("order must be one of 'C', 'F', 'A', or 'K' (got '{}')".format(order))
 

From cd4f0d42788d461749ed2da7d37fbc7dac4dcf8b Mon Sep 17 00:00:00 2001
From: Anton Volkov <antonwolfy@gmail.com>
Date: Wed, 7 Jun 2023 11:58:52 -0500
Subject: [PATCH 123/129] add docstrings for out and order

---
 dpnp/dpnp_algo/dpnp_elementwise_common.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/dpnp/dpnp_algo/dpnp_elementwise_common.py b/dpnp/dpnp_algo/dpnp_elementwise_common.py
index 15481941f85f..9c2383e57a0b 100644
--- a/dpnp/dpnp_algo/dpnp_elementwise_common.py
+++ b/dpnp/dpnp_algo/dpnp_elementwise_common.py
@@ -53,6 +53,12 @@
         First input array, expected to have numeric data type.
     x2 (dpnp.ndarray):
         Second input array, also expected to have numeric data type.
+    out ({None, dpnp.ndarray}, optional):
+        Output array to populate.
+        Array have the correct shape and the expected data type.
+    order ("C","F","A","K", None, optional):
+        Memory layout of the newly output array, if parameter `out` is `None`.
+        Default: "K".
 Returns:
     dpnp.ndarray:
         an array containing the result of element-wise division. The data type

From fc7e959c5c0e1a45aa9d62f572670eeb38e164b4 Mon Sep 17 00:00:00 2001
From: Anton Volkov <antonwolfy@gmail.com>
Date: Fri, 9 Jun 2023 09:15:40 -0500
Subject: [PATCH 124/129] Fix hanging in tril

---
 dpnp/backend/extensions/vm/div.cpp | 5 +++++
 tests/test_mathematical.py         | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/dpnp/backend/extensions/vm/div.cpp b/dpnp/backend/extensions/vm/div.cpp
index 48b64959b88a..6e98e09f3475 100644
--- a/dpnp/backend/extensions/vm/div.cpp
+++ b/dpnp/backend/extensions/vm/div.cpp
@@ -219,6 +219,11 @@ bool can_call_div(sycl::queue exec_q,
     {
         return false;
     }
+    else if (dst_nd == 0)
+    {
+        // don't call OneMKL for 0d arrays
+        return false;
+    }
 
     // shapes must be the same
     const py::ssize_t* src1_shape = src1.get_shape_raw();
diff --git a/tests/test_mathematical.py b/tests/test_mathematical.py
index 5f0d73b23b7b..6224eb57ab81 100644
--- a/tests/test_mathematical.py
+++ b/tests/test_mathematical.py
@@ -284,7 +284,7 @@ def test_divide_scalar(shape, dtype):
 
     result = 0.5 / dpnp_a / 1.7
     expected = 0.5 / np_a / 1.7
-    assert_allclose(result, expected)
+    assert_allclose(result, expected, rtol=1e-6)
 
 
 @pytest.mark.parametrize("shape",

From d9d358edb500a2b9606b4db446988a327a7f4e91 Mon Sep 17 00:00:00 2001
From: Anton Volkov <antonwolfy@gmail.com>
Date: Fri, 9 Jun 2023 12:10:24 -0500
Subject: [PATCH 125/129] Remove w/a in divide tests for type aligning with
 numpy

---
 tests/third_party/cupy/math_tests/test_arithmetic.py | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/tests/third_party/cupy/math_tests/test_arithmetic.py b/tests/third_party/cupy/math_tests/test_arithmetic.py
index 39dc3e10f721..71f33429c704 100644
--- a/tests/third_party/cupy/math_tests/test_arithmetic.py
+++ b/tests/third_party/cupy/math_tests/test_arithmetic.py
@@ -168,13 +168,6 @@ def check_binary(self, xp):
                         y = y.astype(dtype1)
                     elif is_array_arg2 and not is_array_arg1:
                         y = y.astype(dtype2)
-            elif self.name in ('divide', 'true_divide'):
-                # If one input is an array of float32 and another - an integer or floating scalar,
-                # NumPy will return an output array of float32, while DPNP will return the array of float64,
-                # since NumPy would use the same float64 type when instead of scalar here is array of integer of floating type.
-                if not (is_array_arg1 and is_array_arg2):
-                    if (is_array_arg1 and arg1.dtype == numpy.float32) ^ (is_array_arg2 and arg2.dtype == numpy.float32):
-                        y = y.astype(numpy.float32)
 
         # NumPy returns different values (nan/inf) on division by zero
         # depending on the architecture.

From aa09ab5379f3d174ebf4586e792a165c45e74458 Mon Sep 17 00:00:00 2001
From: Anton Volkov <antonwolfy@gmail.com>
Date: Fri, 9 Jun 2023 14:46:37 -0500
Subject: [PATCH 126/129] only events from host task has to be passed in
 keep_args_alive()

---
 .github/workflows/conda-package.yml       |  8 +++-
 .github/workflows/generate_coverage.yaml  |  1 +
 dpnp/backend/extensions/vm/CMakeLists.txt | 13 ++++++
 dpnp/backend/extensions/vm/div.cpp        | 56 ++++++++++++++++++++---
 4 files changed, 69 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml
index e42adbdc913d..5645191b7c6e 100644
--- a/.github/workflows/conda-package.yml
+++ b/.github/workflows/conda-package.yml
@@ -194,8 +194,10 @@ jobs:
       # TODO: run the whole scope once the issues on CPU are resolved
       - name: Run tests
         run: |
-          python -m pytest -q -ra --disable-warnings -vv ${{ env.TEST_SCOPE }}
+          python -m pytest -q -ra --disable-warnings -vv -s ${{ env.TEST_SCOPE }}
         working-directory: ${{ env.tests-path }}
+        env:
+          SYCL_QUEUE_THREAD_POOL_SIZE: 16
 
   test_windows:
     name: Test ['windows-latest', python='${{ matrix.python }}']
@@ -331,8 +333,10 @@ jobs:
       # TODO: run the whole scope once the issues on CPU are resolved
       - name: Run tests
         run: |
-          python -m pytest -q -ra --disable-warnings -vv ${{ env.TEST_SCOPE }}
+          python -m pytest -q -ra --disable-warnings -vv -s ${{ env.TEST_SCOPE }}
         working-directory: ${{ env.tests-path }}
+        env:
+          SYCL_QUEUE_THREAD_POOL_SIZE: 16
 
   upload:
     name: Upload ['${{ matrix.os }}', python='${{ matrix.python }}']
diff --git a/.github/workflows/generate_coverage.yaml b/.github/workflows/generate_coverage.yaml
index bd3c8c366f0e..75feca67e560 100644
--- a/.github/workflows/generate_coverage.yaml
+++ b/.github/workflows/generate_coverage.yaml
@@ -66,6 +66,7 @@ jobs:
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
           COVERALLS_PARALLEL: true
+          SYCL_QUEUE_THREAD_POOL_SIZE: 6
 
   coveralls:
     name: Indicate completion to coveralls.io
diff --git a/dpnp/backend/extensions/vm/CMakeLists.txt b/dpnp/backend/extensions/vm/CMakeLists.txt
index 8f3086ec3a9e..07a4ffae8aba 100644
--- a/dpnp/backend/extensions/vm/CMakeLists.txt
+++ b/dpnp/backend/extensions/vm/CMakeLists.txt
@@ -55,7 +55,9 @@ else()
   target_compile_options(${python_module_name} PRIVATE
     -fno-approx-func
     -fno-finite-math-only
+    -no-ipo
     )
+    target_link_options(${python_module_name} PRIVATE -no-ipo)
 endif()
 
 target_link_options(${python_module_name} PUBLIC -fsycl-device-code-split=per_kernel)
@@ -70,6 +72,17 @@ endif()
 
 target_link_libraries(${python_module_name} PUBLIC MKL::MKL_DPCPP)
 
+target_link_libraries(${python_module_name} PUBLIC oneDPL)
+
+if (UNIX)
+  # needed for STL headers with GCC < 11
+  target_compile_definitions(${python_module_name} PUBLIC _GLIBCXX_USE_TBB_PAR_BACKEND=0)
+endif()
+
+target_compile_definitions(${python_module_name} PUBLIC PSTL_USE_PARALLEL_POLICIES=0)
+# work-around for Windows at exit crash with predefined policies
+target_compile_definitions(${python_module_name} PUBLIC ONEDPL_USE_PREDEFINED_POLICIES=0)
+
 install(TARGETS ${python_module_name}
   DESTINATION "dpnp/backend/extensions/vm"
 )
diff --git a/dpnp/backend/extensions/vm/div.cpp b/dpnp/backend/extensions/vm/div.cpp
index 6e98e09f3475..8a6751a45a44 100644
--- a/dpnp/backend/extensions/vm/div.cpp
+++ b/dpnp/backend/extensions/vm/div.cpp
@@ -64,16 +64,46 @@ static sycl::event div_impl(sycl::queue exec_q,
 {
     type_utils::validate_type_for_device<T>(exec_q);
 
-    const T* a = reinterpret_cast<const T*>(in_a);
-    const T* b = reinterpret_cast<const T*>(in_b);
-    T* y = reinterpret_cast<T*>(out_y);
+    std::cerr << "enter div_impl" << std::endl;
 
-    return mkl_vm::div(exec_q,
+    const T* _a = reinterpret_cast<const T*>(in_a);
+    const T* _b = reinterpret_cast<const T*>(in_b);
+    T* _y = reinterpret_cast<T*>(out_y);
+
+    std::cerr << "casting is done" << std::endl;
+
+    T* a = sycl::malloc_device<T>(n, exec_q);
+    T* b = sycl::malloc_device<T>(n, exec_q);
+    T* y = sycl::malloc_device<T>(n, exec_q);
+
+    std::cerr << "malloc is done" << std::endl;
+
+    exec_q.copy(_a, a, n).wait();
+    exec_q.copy(_b, b, n).wait();
+    exec_q.copy(_y, y, n).wait();
+
+    std::cerr << "copy is done" << std::endl;
+
+    sycl::event ev = mkl_vm::div(exec_q,
                        n, // number of elements to be calculated
                        a, // pointer `a` containing 1st input vector of size n
                        b, // pointer `b` containing 2nd input vector of size n
                        y, // pointer `y` to the output vector of size n
                        depends);
+    ev.wait();
+
+    std::cerr << "div is done" << std::endl;
+
+    exec_q.copy(y, _y, n).wait();
+
+    std::cerr << "copy is done" << std::endl;
+
+    sycl::free(a, exec_q);
+    sycl::free(b, exec_q);
+    sycl::free(y, exec_q);
+
+    std::cerr << "leaving div_impl" << std::endl;
+    return sycl::event();
 }
 
 std::pair<sycl::event, sycl::event> div(sycl::queue exec_q,
@@ -175,9 +205,21 @@ std::pair<sycl::event, sycl::event> div(sycl::queue exec_q,
         throw py::value_error("No div implementation defined");
     }
     sycl::event sum_ev = div_fn(exec_q, src_nelems, src1_data, src2_data, dst_data, depends);
-
-    sycl::event ht_ev = dpctl::utils::keep_args_alive(exec_q, {src1, src2, dst}, {sum_ev});
-    return std::make_pair(ht_ev, sum_ev);
+    // sum_ev.wait();
+
+    // int* dummy = sycl::malloc_device<int>(1, exec_q);
+    // sycl::event cleanup_ev = exec_q.submit([&](sycl::handler& cgh) {
+    //     // cgh.depends_on(sum_ev);
+    //     auto ctx = exec_q.get_context();
+    //     cgh.host_task([dummy, ctx]() {
+    //         // dummy host task to pass into keep_args_alive
+    //         sycl::free(dummy, ctx);
+    //     });
+    // });
+
+    // sycl::event ht_ev = dpctl::utils::keep_args_alive(exec_q, {src1, src2, dst}, {sum_ev});
+    // return std::make_pair(ht_ev, sum_ev);
+    return std::make_pair(sycl::event(), sycl::event());
 }
 
 bool can_call_div(sycl::queue exec_q,

From 1b4b671fb10c39d01c6d419532f90666dd468041 Mon Sep 17 00:00:00 2001
From: Anton Volkov <antonwolfy@gmail.com>
Date: Mon, 12 Jun 2023 08:24:36 -0500
Subject: [PATCH 127/129] exclude call of div() with MKL 2023.1.0

---
 .github/workflows/conda-package.yml       |  8 +--
 dpnp/backend/extensions/vm/CMakeLists.txt | 13 -----
 dpnp/backend/extensions/vm/div.cpp        | 66 +++++++----------------
 3 files changed, 22 insertions(+), 65 deletions(-)

diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml
index 5645191b7c6e..fbfe66ff17b9 100644
--- a/.github/workflows/conda-package.yml
+++ b/.github/workflows/conda-package.yml
@@ -194,10 +194,10 @@ jobs:
       # TODO: run the whole scope once the issues on CPU are resolved
       - name: Run tests
         run: |
-          python -m pytest -q -ra --disable-warnings -vv -s ${{ env.TEST_SCOPE }}
+          python -m pytest -q -ra --disable-warnings -vv ${{ env.TEST_SCOPE }}
         working-directory: ${{ env.tests-path }}
         env:
-          SYCL_QUEUE_THREAD_POOL_SIZE: 16
+          SYCL_QUEUE_THREAD_POOL_SIZE: 6
 
   test_windows:
     name: Test ['windows-latest', python='${{ matrix.python }}']
@@ -333,10 +333,10 @@ jobs:
       # TODO: run the whole scope once the issues on CPU are resolved
       - name: Run tests
         run: |
-          python -m pytest -q -ra --disable-warnings -vv -s ${{ env.TEST_SCOPE }}
+          python -m pytest -q -ra --disable-warnings -vv ${{ env.TEST_SCOPE }}
         working-directory: ${{ env.tests-path }}
         env:
-          SYCL_QUEUE_THREAD_POOL_SIZE: 16
+          SYCL_QUEUE_THREAD_POOL_SIZE: 6
 
   upload:
     name: Upload ['${{ matrix.os }}', python='${{ matrix.python }}']
diff --git a/dpnp/backend/extensions/vm/CMakeLists.txt b/dpnp/backend/extensions/vm/CMakeLists.txt
index 07a4ffae8aba..8f3086ec3a9e 100644
--- a/dpnp/backend/extensions/vm/CMakeLists.txt
+++ b/dpnp/backend/extensions/vm/CMakeLists.txt
@@ -55,9 +55,7 @@ else()
   target_compile_options(${python_module_name} PRIVATE
     -fno-approx-func
     -fno-finite-math-only
-    -no-ipo
     )
-    target_link_options(${python_module_name} PRIVATE -no-ipo)
 endif()
 
 target_link_options(${python_module_name} PUBLIC -fsycl-device-code-split=per_kernel)
@@ -72,17 +70,6 @@ endif()
 
 target_link_libraries(${python_module_name} PUBLIC MKL::MKL_DPCPP)
 
-target_link_libraries(${python_module_name} PUBLIC oneDPL)
-
-if (UNIX)
-  # needed for STL headers with GCC < 11
-  target_compile_definitions(${python_module_name} PUBLIC _GLIBCXX_USE_TBB_PAR_BACKEND=0)
-endif()
-
-target_compile_definitions(${python_module_name} PUBLIC PSTL_USE_PARALLEL_POLICIES=0)
-# work-around for Windows at exit crash with predefined policies
-target_compile_definitions(${python_module_name} PUBLIC ONEDPL_USE_PREDEFINED_POLICIES=0)
-
 install(TARGETS ${python_module_name}
   DESTINATION "dpnp/backend/extensions/vm"
 )
diff --git a/dpnp/backend/extensions/vm/div.cpp b/dpnp/backend/extensions/vm/div.cpp
index 8a6751a45a44..28fbe1cdf3cc 100644
--- a/dpnp/backend/extensions/vm/div.cpp
+++ b/dpnp/backend/extensions/vm/div.cpp
@@ -64,46 +64,16 @@ static sycl::event div_impl(sycl::queue exec_q,
 {
     type_utils::validate_type_for_device<T>(exec_q);
 
-    std::cerr << "enter div_impl" << std::endl;
+    const T* a = reinterpret_cast<const T*>(in_a);
+    const T* b = reinterpret_cast<const T*>(in_b);
+    T* y = reinterpret_cast<T*>(out_y);
 
-    const T* _a = reinterpret_cast<const T*>(in_a);
-    const T* _b = reinterpret_cast<const T*>(in_b);
-    T* _y = reinterpret_cast<T*>(out_y);
-
-    std::cerr << "casting is done" << std::endl;
-
-    T* a = sycl::malloc_device<T>(n, exec_q);
-    T* b = sycl::malloc_device<T>(n, exec_q);
-    T* y = sycl::malloc_device<T>(n, exec_q);
-
-    std::cerr << "malloc is done" << std::endl;
-
-    exec_q.copy(_a, a, n).wait();
-    exec_q.copy(_b, b, n).wait();
-    exec_q.copy(_y, y, n).wait();
-
-    std::cerr << "copy is done" << std::endl;
-
-    sycl::event ev = mkl_vm::div(exec_q,
+    return mkl_vm::div(exec_q,
                        n, // number of elements to be calculated
                        a, // pointer `a` containing 1st input vector of size n
                        b, // pointer `b` containing 2nd input vector of size n
                        y, // pointer `y` to the output vector of size n
                        depends);
-    ev.wait();
-
-    std::cerr << "div is done" << std::endl;
-
-    exec_q.copy(y, _y, n).wait();
-
-    std::cerr << "copy is done" << std::endl;
-
-    sycl::free(a, exec_q);
-    sycl::free(b, exec_q);
-    sycl::free(y, exec_q);
-
-    std::cerr << "leaving div_impl" << std::endl;
-    return sycl::event();
 }
 
 std::pair<sycl::event, sycl::event> div(sycl::queue exec_q,
@@ -205,20 +175,9 @@ std::pair<sycl::event, sycl::event> div(sycl::queue exec_q,
         throw py::value_error("No div implementation defined");
     }
     sycl::event sum_ev = div_fn(exec_q, src_nelems, src1_data, src2_data, dst_data, depends);
-    // sum_ev.wait();
-
-    // int* dummy = sycl::malloc_device<int>(1, exec_q);
-    // sycl::event cleanup_ev = exec_q.submit([&](sycl::handler& cgh) {
-    //     // cgh.depends_on(sum_ev);
-    //     auto ctx = exec_q.get_context();
-    //     cgh.host_task([dummy, ctx]() {
-    //         // dummy host task to pass into keep_args_alive
-    //         sycl::free(dummy, ctx);
-    //     });
-    // });
-
-    // sycl::event ht_ev = dpctl::utils::keep_args_alive(exec_q, {src1, src2, dst}, {sum_ev});
-    // return std::make_pair(ht_ev, sum_ev);
+
+    sycl::event ht_ev = dpctl::utils::keep_args_alive(exec_q, {src1, src2, dst}, {sum_ev});
+    return std::make_pair(ht_ev, sum_ev);
     return std::make_pair(sycl::event(), sycl::event());
 }
 
@@ -227,6 +186,7 @@ bool can_call_div(sycl::queue exec_q,
                   dpctl::tensor::usm_ndarray src2,
                   dpctl::tensor::usm_ndarray dst)
 {
+#if INTEL_MKL_VERSION >= 20230002
     // check type_nums
     int src1_typenum = src1.get_typenum();
     int src2_typenum = src2.get_typenum();
@@ -325,6 +285,16 @@ bool can_call_div(sycl::queue exec_q,
         return false;
     }
     return true;
+#else
+    // In OneMKL 2023.1.0 the call of oneapi::mkl::vm::div() is going to dead lock
+    // inside ~usm_wrapper_to_host()->{...; q_->wait_and_throw(); ...}
+
+    (void)exec_q;
+    (void)src1;
+    (void)src2;
+    (void)dst;
+    return false;
+#endif // INTEL_MKL_VERSION >= 20230002
 }
 
 template <typename fnT, typename T>

From 2e9b9e398711aa33b188e5fc20d40f01d20802e3 Mon Sep 17 00:00:00 2001
From: Anton Volkov <antonwolfy@gmail.com>
Date: Mon, 12 Jun 2023 11:07:19 -0500
Subject: [PATCH 128/129] w/a for MKL calls

---
 dpnp/__init__.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/dpnp/__init__.py b/dpnp/__init__.py
index 9478c3aef6c8..fc3fc5ff33fc 100644
--- a/dpnp/__init__.py
+++ b/dpnp/__init__.py
@@ -41,6 +41,8 @@
         os.add_dll_directory(dpctlpath)
     os.environ["PATH"] = os.pathsep.join([os.getenv("PATH", ""), mypath, dpctlpath])
 
+# workaround against hanging in OneMKL calls
+os.environ.setdefault('SYCL_QUEUE_THREAD_POOL_SIZE', '6')
 
 from dpnp.dpnp_array import dpnp_array as ndarray
 from dpnp.dpnp_flatiter import flatiter as flatiter

From a7aa9afc67edef24cc72091a35cdd05b2fc6ee26 Mon Sep 17 00:00:00 2001
From: Anton Volkov <antonwolfy@gmail.com>
Date: Wed, 7 Jun 2023 14:10:41 -0500
Subject: [PATCH 129/129] Update versions

---
 .github/workflows/build-sphinx.yml       | 4 ++--
 .github/workflows/conda-package.yml      | 8 ++++----
 .github/workflows/generate_coverage.yaml | 2 +-
 .github/workflows/pre-commit.yml         | 4 ++--
 doc/conf.py                              | 2 +-
 dpnp/backend/doc/Doxyfile                | 2 +-
 dpnp/version.py                          | 2 +-
 7 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/.github/workflows/build-sphinx.yml b/.github/workflows/build-sphinx.yml
index f4e3b74c6237..38c001f0999c 100644
--- a/.github/workflows/build-sphinx.yml
+++ b/.github/workflows/build-sphinx.yml
@@ -55,7 +55,7 @@ jobs:
           sudo apt-get install -y nvidia-cuda-toolkit clinfo
 
       - name: Checkout repo
-        uses: actions/checkout@v3.3.0
+        uses: actions/checkout@v3.5.2
 
       # https://github.com/marketplace/actions/setup-miniconda
       - name: Setup miniconda
@@ -108,7 +108,7 @@ jobs:
         if: |
           !github.event.pull_request.head.repo.fork  &&
           (github.ref == 'refs/heads/master' || (startsWith(github.ref, 'refs/heads/release') == true) || github.event_name == 'push' && contains(github.ref, 'refs/tags/'))
-        uses: peaceiris/actions-gh-pages@v3.9.2
+        uses: peaceiris/actions-gh-pages@v3.9.3
         with:
           github_token: ${{ secrets.GITHUB_TOKEN }}
           publish_dir: doc/_build/html/
diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml
index fbfe66ff17b9..2f9a94db6914 100644
--- a/.github/workflows/conda-package.yml
+++ b/.github/workflows/conda-package.yml
@@ -50,7 +50,7 @@ jobs:
           access_token: ${{ github.token }}
 
       - name: Checkout DPNP repo
-        uses: actions/checkout@v3.3.0
+        uses: actions/checkout@v3.5.2
         with:
           fetch-depth: 0
 
@@ -76,7 +76,7 @@ jobs:
         run: conda install conda-build
 
       - name: Cache conda packages
-        uses: actions/cache@v3.2.6
+        uses: actions/cache@v3.3.0
         env:
           CACHE_NUMBER: 1  # Increase to reset cache
         with:
@@ -169,7 +169,7 @@ jobs:
           TEST_CHANNELS: '-c ${{ env.channel-path }} ${{ env.CHANNELS }}'
 
       - name: Cache conda packages
-        uses: actions/cache@v3.2.6
+        uses: actions/cache@v3.3.0
         env:
           CACHE_NUMBER: 1 # Increase to reset cache
         with:
@@ -294,7 +294,7 @@ jobs:
         run: more lockfile
 
       - name: Cache conda packages
-        uses: actions/cache@v3.2.6
+        uses: actions/cache@v3.3.0
         env:
           CACHE_NUMBER: 1  # Increase to reset cache
         with:
diff --git a/.github/workflows/generate_coverage.yaml b/.github/workflows/generate_coverage.yaml
index 75feca67e560..d4158d212d43 100644
--- a/.github/workflows/generate_coverage.yaml
+++ b/.github/workflows/generate_coverage.yaml
@@ -24,7 +24,7 @@ jobs:
           access_token: ${{ github.token }}
 
       - name: Checkout repo
-        uses: actions/checkout@v3
+        uses: actions/checkout@v3.5.2
         with:
           fetch-depth: 0
 
diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
index 770aea0dc65a..905f4b6c3656 100644
--- a/.github/workflows/pre-commit.yml
+++ b/.github/workflows/pre-commit.yml
@@ -9,8 +9,8 @@ jobs:
   pre-commit:
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@v3.3.0
-    - uses: actions/setup-python@v4.5.0
+    - uses: actions/checkout@v3.5.2
+    - uses: actions/setup-python@v4.6.1
       with:
         python-version: '3.10'
     - uses: pre-commit/action@v3.0.0
diff --git a/doc/conf.py b/doc/conf.py
index 532715c4e44d..14ad9efe447e 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -33,7 +33,7 @@
 # The short X.Y version
 version = '0.11'
 # The full version, including alpha/beta/rc tags
-release = '0.11.1'
+release = '0.11.2dev1'
 
 
 # -- General configuration ---------------------------------------------------
diff --git a/dpnp/backend/doc/Doxyfile b/dpnp/backend/doc/Doxyfile
index 3d6c971a7991..25dbd8972c48 100644
--- a/dpnp/backend/doc/Doxyfile
+++ b/dpnp/backend/doc/Doxyfile
@@ -38,7 +38,7 @@ PROJECT_NAME           = "DPNP C++ backend kernel library"
 # could be handy for archiving the generated documentation or if some version
 # control system is used.
 
-PROJECT_NUMBER         = 0.11.1
+PROJECT_NUMBER         = 0.11.2dev1
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a
diff --git a/dpnp/version.py b/dpnp/version.py
index f09ea3c76a75..8fa967380a84 100644
--- a/dpnp/version.py
+++ b/dpnp/version.py
@@ -29,6 +29,6 @@
 DPNP version module
 """
 
-__version__: str = '0.11.1'
+__version__: str = '0.11.2dev1'
 
 version: str = __version__