diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml
index 74e387f91a..f717f4b309 100644
--- a/.github/workflows/conda-package.yml
+++ b/.github/workflows/conda-package.yml
@@ -486,7 +486,7 @@ jobs:
           done
 
   array-api-conformity:
-    needs: test_linux
+    needs: build_linux
     runs-on:  ${{ matrix.runner }}
 
     strategy:
diff --git a/dpctl/_backend.pxd b/dpctl/_backend.pxd
index 630b80bb55..3f7ba63a55 100644
--- a/dpctl/_backend.pxd
+++ b/dpctl/_backend.pxd
@@ -299,6 +299,7 @@ cdef extern from "syclinterface/dpctl_sycl_platform_manager.h":
 
 
 cdef extern from "syclinterface/dpctl_sycl_platform_interface.h":
+    cdef bool DPCTLPlatform_AreEq(const DPCTLSyclPlatformRef, const DPCTLSyclPlatformRef)
     cdef DPCTLSyclPlatformRef DPCTLPlatform_Copy(const DPCTLSyclPlatformRef)
     cdef DPCTLSyclPlatformRef DPCTLPlatform_Create()
     cdef DPCTLSyclPlatformRef DPCTLPlatform_CreateFromSelector(
@@ -308,6 +309,7 @@ cdef extern from "syclinterface/dpctl_sycl_platform_interface.h":
     cdef const char *DPCTLPlatform_GetName(const DPCTLSyclPlatformRef)
     cdef const char *DPCTLPlatform_GetVendor(const DPCTLSyclPlatformRef)
     cdef const char *DPCTLPlatform_GetVersion(const DPCTLSyclPlatformRef)
+    cdef size_t DPCTLPlatform_Hash(const DPCTLSyclPlatformRef)
     cdef DPCTLPlatformVectorRef DPCTLPlatform_GetPlatforms()
     cdef DPCTLSyclContextRef DPCTLPlatform_GetDefaultContext(
         const DPCTLSyclPlatformRef)
diff --git a/dpctl/_sycl_platform.pxd b/dpctl/_sycl_platform.pxd
index d8d737d874..2f619203d6 100644
--- a/dpctl/_sycl_platform.pxd
+++ b/dpctl/_sycl_platform.pxd
@@ -21,6 +21,8 @@
     SYCL platform-related helper functions.
 """
 
+from libcpp cimport bool
+
 from ._backend cimport DPCTLSyclDeviceSelectorRef, DPCTLSyclPlatformRef
 
 
@@ -40,6 +42,7 @@ cdef class SyclPlatform(_SyclPlatform):
     cdef int _init_from_selector(self, DPCTLSyclDeviceSelectorRef DSRef)
     cdef int _init_from__SyclPlatform(self, _SyclPlatform other)
     cdef DPCTLSyclPlatformRef get_platform_ref(self)
+    cdef bool equals(self, SyclPlatform)
 
 
 cpdef list get_platforms()
diff --git a/dpctl/_sycl_platform.pyx b/dpctl/_sycl_platform.pyx
index 449ad9029f..589c8ae895 100644
--- a/dpctl/_sycl_platform.pyx
+++ b/dpctl/_sycl_platform.pyx
@@ -21,10 +21,13 @@
 """ Implements SyclPlatform Cython extension type.
 """
 
+from libcpp cimport bool
+
 from ._backend cimport (  # noqa: E211
     DPCTLCString_Delete,
     DPCTLDeviceSelector_Delete,
     DPCTLFilterSelector_Create,
+    DPCTLPlatform_AreEq,
     DPCTLPlatform_Copy,
     DPCTLPlatform_Create,
     DPCTLPlatform_CreateFromSelector,
@@ -35,6 +38,7 @@ from ._backend cimport (  # noqa: E211
     DPCTLPlatform_GetPlatforms,
     DPCTLPlatform_GetVendor,
     DPCTLPlatform_GetVersion,
+    DPCTLPlatform_Hash,
     DPCTLPlatformMgr_GetInfo,
     DPCTLPlatformMgr_PrintInfo,
     DPCTLPlatformVector_Delete,
@@ -274,6 +278,42 @@ cdef class SyclPlatform(_SyclPlatform):
         else:
             return SyclContext._create(CRef)
 
+    cdef bool equals(self, SyclPlatform other):
+        """
+        Returns true if the :class:`dpctl.SyclPlatform` argument has the
+        same underlying ``DPCTLSyclPlatformRef`` object as this
+        :class:`dpctl.SyclPlatform` instance.
+
+        Returns:
+            :obj:`bool`: ``True`` if the two :class:`dpctl.SyclPlatform` objects
+            point to the same ``DPCTLSyclPlatformRef`` object, otherwise
+            ``False``.
+        """
+        return DPCTLPlatform_AreEq(self._platform_ref, other.get_platform_ref())
+
+    def __eq__(self, other):
+        """
+        Returns True if the :class:`dpctl.SyclPlatform` argument has the
+        same underlying ``DPCTLSyclPlatformRef`` object as this
+        :class:`dpctl.SyclPlatform` instance.
+
+        Returns:
+            :obj:`bool`: ``True`` if the two :class:`dpctl.SyclPlatform` objects
+            point to the same ``DPCTLSyclPlatformRef`` object, otherwise
+            ``False``.
+        """
+        if isinstance(other, SyclPlatform):
+            return self.equals(<SyclPlatform> other)
+        else:
+            return False
+
+    def __hash__(self):
+        """
+        Returns a hash value by hashing the underlying ``sycl::platform`` object.
+
+        """
+        return DPCTLPlatform_Hash(self._platform_ref)
+
 
 def lsplatform(verbosity=0):
     """
diff --git a/dpctl/_sycl_queue.pxd b/dpctl/_sycl_queue.pxd
index 729adfc3cb..c906ada4d6 100644
--- a/dpctl/_sycl_queue.pxd
+++ b/dpctl/_sycl_queue.pxd
@@ -29,7 +29,7 @@ from ._sycl_event cimport SyclEvent
 from .program._program cimport SyclKernel
 
 
-cdef void default_async_error_handler(int) nogil except *
+cdef void default_async_error_handler(int) except * nogil
 
 cdef public api class _SyclQueue [
     object Py_SyclQueueObject, type Py_SyclQueueType
diff --git a/dpctl/tensor/__init__.py b/dpctl/tensor/__init__.py
index 0134998f7a..f1ec439f3a 100644
--- a/dpctl/tensor/__init__.py
+++ b/dpctl/tensor/__init__.py
@@ -135,6 +135,8 @@
     logical_not,
     logical_or,
     logical_xor,
+    maximum,
+    minimum,
     multiply,
     negative,
     not_equal,
@@ -274,6 +276,8 @@
     "log1p",
     "log2",
     "log10",
+    "maximum",
+    "minimum",
     "multiply",
     "negative",
     "not_equal",
diff --git a/dpctl/tensor/_copy_utils.py b/dpctl/tensor/_copy_utils.py
index e759571790..eab1febb89 100644
--- a/dpctl/tensor/_copy_utils.py
+++ b/dpctl/tensor/_copy_utils.py
@@ -13,6 +13,7 @@
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
+import builtins
 import operator
 
 import numpy as np
@@ -245,6 +246,23 @@ def _broadcast_shapes(sh1, sh2):
         ).shape
 
 
+def _broadcast_strides(X_shape, X_strides, res_ndim):
+    """
+    Broadcasts strides to match the given dimensions;
+    returns tuple type strides.
+    """
+    out_strides = [0] * res_ndim
+    X_shape_len = len(X_shape)
+    str_dim = -X_shape_len
+    for i in range(X_shape_len):
+        shape_value = X_shape[i]
+        if not shape_value == 1:
+            out_strides[str_dim] = X_strides[i]
+        str_dim += 1
+
+    return tuple(out_strides)
+
+
 def _copy_from_usm_ndarray_to_usm_ndarray(dst, src):
     if any(
         not isinstance(arg, dpt.usm_ndarray)
@@ -267,7 +285,7 @@ def _copy_from_usm_ndarray_to_usm_ndarray(dst, src):
     except ValueError as exc:
         raise ValueError("Shapes of two arrays are not compatible") from exc
 
-    if dst.size < src.size:
+    if dst.size < src.size and dst.size < np.prod(common_shape):
         raise ValueError("Destination is smaller ")
 
     if len(common_shape) > dst.ndim:
@@ -278,17 +296,127 @@ def _copy_from_usm_ndarray_to_usm_ndarray(dst, src):
         common_shape = common_shape[ones_count:]
 
     if src.ndim < len(common_shape):
-        new_src_strides = (0,) * (len(common_shape) - src.ndim) + src.strides
+        new_src_strides = _broadcast_strides(
+            src.shape, src.strides, len(common_shape)
+        )
+        src_same_shape = dpt.usm_ndarray(
+            common_shape, dtype=src.dtype, buffer=src, strides=new_src_strides
+        )
+    elif src.ndim == len(common_shape):
+        new_src_strides = _broadcast_strides(
+            src.shape, src.strides, len(common_shape)
+        )
         src_same_shape = dpt.usm_ndarray(
             common_shape, dtype=src.dtype, buffer=src, strides=new_src_strides
         )
     else:
-        src_same_shape = src
-        src_same_shape.shape = common_shape
+        # since broadcasting succeeded, src.ndim is greater because of
+        # leading sequence of ones, so we trim it
+        n = len(common_shape)
+        new_src_strides = _broadcast_strides(
+            src.shape[-n:], src.strides[-n:], n
+        )
+        src_same_shape = dpt.usm_ndarray(
+            common_shape,
+            dtype=src.dtype,
+            buffer=src.usm_data,
+            strides=new_src_strides,
+            offset=src._element_offset,
+        )
 
     _copy_same_shape(dst, src_same_shape)
 
 
+def _empty_like_orderK(X, dt, usm_type=None, dev=None):
+    """Returns empty array like `x`, using order='K'
+
+    For an array `x` that was obtained by permutation of a contiguous
+    array the returned array will have the same shape and the same
+    strides as `x`.
+    """
+    if not isinstance(X, dpt.usm_ndarray):
+        raise TypeError(f"Expected usm_ndarray, got {type(X)}")
+    if usm_type is None:
+        usm_type = X.usm_type
+    if dev is None:
+        dev = X.device
+    fl = X.flags
+    if fl["C"] or X.size <= 1:
+        return dpt.empty_like(
+            X, dtype=dt, usm_type=usm_type, device=dev, order="C"
+        )
+    elif fl["F"]:
+        return dpt.empty_like(
+            X, dtype=dt, usm_type=usm_type, device=dev, order="F"
+        )
+    st = list(X.strides)
+    perm = sorted(
+        range(X.ndim), key=lambda d: builtins.abs(st[d]), reverse=True
+    )
+    inv_perm = sorted(range(X.ndim), key=lambda i: perm[i])
+    st_sorted = [st[i] for i in perm]
+    sh = X.shape
+    sh_sorted = tuple(sh[i] for i in perm)
+    R = dpt.empty(sh_sorted, dtype=dt, usm_type=usm_type, device=dev, order="C")
+    if min(st_sorted) < 0:
+        sl = tuple(
+            slice(None, None, -1)
+            if st_sorted[i] < 0
+            else slice(None, None, None)
+            for i in range(X.ndim)
+        )
+        R = R[sl]
+    return dpt.permute_dims(R, inv_perm)
+
+
+def _empty_like_pair_orderK(X1, X2, dt, res_shape, usm_type, dev):
+    if not isinstance(X1, dpt.usm_ndarray):
+        raise TypeError(f"Expected usm_ndarray, got {type(X1)}")
+    if not isinstance(X2, dpt.usm_ndarray):
+        raise TypeError(f"Expected usm_ndarray, got {type(X2)}")
+    nd1 = X1.ndim
+    nd2 = X2.ndim
+    if nd1 > nd2 and X1.shape == res_shape:
+        return _empty_like_orderK(X1, dt, usm_type, dev)
+    elif nd1 < nd2 and X2.shape == res_shape:
+        return _empty_like_orderK(X2, dt, usm_type, dev)
+    fl1 = X1.flags
+    fl2 = X2.flags
+    if fl1["C"] or fl2["C"]:
+        return dpt.empty(
+            res_shape, dtype=dt, usm_type=usm_type, device=dev, order="C"
+        )
+    if fl1["F"] and fl2["F"]:
+        return dpt.empty(
+            res_shape, dtype=dt, usm_type=usm_type, device=dev, order="F"
+        )
+    st1 = list(X1.strides)
+    st2 = list(X2.strides)
+    max_ndim = max(nd1, nd2)
+    st1 += [0] * (max_ndim - len(st1))
+    st2 += [0] * (max_ndim - len(st2))
+    perm = sorted(
+        range(max_ndim),
+        key=lambda d: (builtins.abs(st1[d]), builtins.abs(st2[d])),
+        reverse=True,
+    )
+    inv_perm = sorted(range(max_ndim), key=lambda i: perm[i])
+    st1_sorted = [st1[i] for i in perm]
+    st2_sorted = [st2[i] for i in perm]
+    sh = res_shape
+    sh_sorted = tuple(sh[i] for i in perm)
+    R = dpt.empty(sh_sorted, dtype=dt, usm_type=usm_type, device=dev, order="C")
+    if max(min(st1_sorted), min(st2_sorted)) < 0:
+        sl = tuple(
+            slice(None, None, -1)
+            if (st1_sorted[i] < 0 and st2_sorted[i] < 0)
+            else slice(None, None, None)
+            for i in range(nd1)
+        )
+        R = R[sl]
+    return dpt.permute_dims(R, inv_perm)
+
+
 def copy(usm_ary, order="K"):
     """copy(ary, order="K")
 
@@ -334,28 +462,15 @@ def copy(usm_ary, order="K"):
             "Unrecognized value of the order keyword. "
             "Recognized values are 'A', 'C', 'F', or 'K'"
         )
-    c_contig = usm_ary.flags.c_contiguous
-    f_contig = usm_ary.flags.f_contiguous
-    R = dpt.usm_ndarray(
-        usm_ary.shape,
-        dtype=usm_ary.dtype,
-        buffer=usm_ary.usm_type,
-        order=copy_order,
-        buffer_ctor_kwargs={"queue": usm_ary.sycl_queue},
-    )
-    if order == "K" and (not c_contig and not f_contig):
-        original_strides = usm_ary.strides
-        ind = sorted(
-            range(usm_ary.ndim),
-            key=lambda i: abs(original_strides[i]),
-            reverse=True,
-        )
-        new_strides = tuple(R.strides[ind[i]] for i in ind)
+    if order == "K":
+        R = _empty_like_orderK(usm_ary, usm_ary.dtype)
+    else:
         R = dpt.usm_ndarray(
             usm_ary.shape,
             dtype=usm_ary.dtype,
-            buffer=R.usm_data,
-            strides=new_strides,
+            buffer=usm_ary.usm_type,
+            order=copy_order,
+            buffer_ctor_kwargs={"queue": usm_ary.sycl_queue},
         )
     _copy_same_shape(R, usm_ary)
     return R
@@ -432,26 +547,15 @@ def astype(usm_ary, newdtype, order="K", casting="unsafe", copy=True):
             "Unrecognized value of the order keyword. "
             "Recognized values are 'A', 'C', 'F', or 'K'"
         )
-    R = dpt.usm_ndarray(
-        usm_ary.shape,
-        dtype=target_dtype,
-        buffer=usm_ary.usm_type,
-        order=copy_order,
-        buffer_ctor_kwargs={"queue": usm_ary.sycl_queue},
-    )
-    if order == "K" and (not c_contig and not f_contig):
-        original_strides = usm_ary.strides
-        ind = sorted(
-            range(usm_ary.ndim),
-            key=lambda i: abs(original_strides[i]),
-            reverse=True,
-        )
-        new_strides = tuple(R.strides[ind[i]] for i in ind)
+    if order == "K":
+        R = _empty_like_orderK(usm_ary, target_dtype)
+    else:
         R = dpt.usm_ndarray(
             usm_ary.shape,
             dtype=target_dtype,
-            buffer=R.usm_data,
-            strides=new_strides,
+            buffer=usm_ary.usm_type,
+            order=copy_order,
+            buffer_ctor_kwargs={"queue": usm_ary.sycl_queue},
         )
     _copy_from_usm_ndarray_to_usm_ndarray(R, usm_ary)
     return R
@@ -492,6 +596,8 @@ def _extract_impl(ary, ary_mask, axis=0):
     dst = dpt.empty(
         dst_shape, dtype=ary.dtype, usm_type=ary.usm_type, device=ary.device
     )
+    if dst.size == 0:
+        return dst
     hev, _ = ti._extract(
         src=ary,
         cumsum=cumsum,
@@ -517,7 +623,7 @@ def _nonzero_impl(ary):
         mask_nelems, dtype=cumsum_dt, sycl_queue=exec_q, order="C"
     )
     mask_count = ti.mask_positions(ary, cumsum, sycl_queue=exec_q)
-    indexes_dt = ti.default_device_int_type(exec_q.sycl_device)
+    indexes_dt = ti.default_device_index_type(exec_q.sycl_device)
     indexes = dpt.empty(
         (ary.ndim, mask_count),
         dtype=indexes_dt,
diff --git a/dpctl/tensor/_elementwise_common.py b/dpctl/tensor/_elementwise_common.py
index 78c79fb2ad..f924ee31cd 100644
--- a/dpctl/tensor/_elementwise_common.py
+++ b/dpctl/tensor/_elementwise_common.py
@@ -26,10 +26,9 @@
 from dpctl.tensor._usmarray import _is_object_with_buffer_protocol as _is_buffer
 from dpctl.utils import ExecutionPlacementError
 
+from ._copy_utils import _empty_like_orderK, _empty_like_pair_orderK
 from ._type_utils import (
     _acceptance_fn_default,
-    _empty_like_orderK,
-    _empty_like_pair_orderK,
     _find_buf_dtype,
     _find_buf_dtype2,
     _find_inplace_dtype,
diff --git a/dpctl/tensor/_elementwise_funcs.py b/dpctl/tensor/_elementwise_funcs.py
index 0ebe874df2..fe85a183ba 100644
--- a/dpctl/tensor/_elementwise_funcs.py
+++ b/dpctl/tensor/_elementwise_funcs.py
@@ -1176,6 +1176,66 @@
     _logical_xor_docstring_,
 )
 
+# B??: ==== MAXIMUM    (x1, x2)
+_maximum_docstring_ = """
+maximum(x1, x2, out=None, order='K')
+
+Compares two input arrays `x1` and `x2` and returns
+a new array containing the element-wise maxima.
+
+Args:
+    x1 (usm_ndarray):
+        First input array, expected to have numeric data type.
+    x2 (usm_ndarray):
+        Second input array, also expected to have numeric data type.
+    out ({None, usm_ndarray}, optional):
+        Output array to populate.
+        Array have the correct shape and the expected data type.
+    order ("C","F","A","K", optional):
+        Memory layout of the newly output array, if parameter `out` is `None`.
+        Default: "K".
+Returns:
+    usm_narray:
+        An array containing the element-wise maxima. The data type of
+        the returned array is determined by the Type Promotion Rules.
+"""
+maximum = BinaryElementwiseFunc(
+    "maximum",
+    ti._maximum_result_type,
+    ti._maximum,
+    _maximum_docstring_,
+)
+
+# B??: ==== MINIMUM    (x1, x2)
+_minimum_docstring_ = """
+minimum(x1, x2, out=None, order='K')
+
+Compares two input arrays `x1` and `x2` and returns
+a new array containing the element-wise minima.
+
+Args:
+    x1 (usm_ndarray):
+        First input array, expected to have numeric data type.
+    x2 (usm_ndarray):
+        Second input array, also expected to have numeric data type.
+    out ({None, usm_ndarray}, optional):
+        Output array to populate.
+        Array have the correct shape and the expected data type.
+    order ("C","F","A","K", optional):
+        Memory layout of the newly output array, if parameter `out` is `None`.
+        Default: "K".
+Returns:
+    usm_narray:
+        An array containing the element-wise minima. The data type of
+        the returned array is determined by the Type Promotion Rules.
+"""
+minimum = BinaryElementwiseFunc(
+    "minimum",
+    ti._minimum_result_type,
+    ti._minimum,
+    _minimum_docstring_,
+)
+
 # B19: ==== MULTIPLY    (x1, x2)
 _multiply_docstring_ = """
 multiply(x1, x2, out=None, order='K')
@@ -1369,6 +1429,12 @@
         First input array, expected to have a real-valued data type.
     x2 (usm_ndarray):
         Second input array, also expected to have a real-valued data type.
+    out ({None, usm_ndarray}, optional):
+        Output array to populate.
+        Array have the correct shape and the expected data type.
+    order ("C","F","A","K", optional):
+        Memory layout of the newly output array, if parameter `out` is `None`.
+        Default: "K".
 Returns:
     usm_ndarray:
         an array containing the element-wise remainders. The data type of
diff --git a/dpctl/tensor/_indexing_functions.py b/dpctl/tensor/_indexing_functions.py
index d73a2e7952..188f3cd291 100644
--- a/dpctl/tensor/_indexing_functions.py
+++ b/dpctl/tensor/_indexing_functions.py
@@ -343,5 +343,5 @@ def nonzero(arr):
             "Expecting dpctl.tensor.usm_ndarray type, " f"got {type(arr)}"
         )
     if arr.ndim == 0:
-        raise ValueError("Array of positive rank is exepcted")
+        raise ValueError("Array of positive rank is expected")
     return _nonzero_impl(arr)
diff --git a/dpctl/tensor/_manipulation_functions.py b/dpctl/tensor/_manipulation_functions.py
index 7b066417af..9406e386af 100644
--- a/dpctl/tensor/_manipulation_functions.py
+++ b/dpctl/tensor/_manipulation_functions.py
@@ -25,6 +25,9 @@
 import dpctl.tensor._tensor_impl as ti
 import dpctl.utils as dputils
 
+from ._copy_utils import _broadcast_strides
+from ._type_utils import _to_device_supported_dtype
+
 __doc__ = (
     "Implementation module for array manipulation "
     "functions in :module:`dpctl.tensor`"
@@ -118,23 +121,6 @@ def __repr__(self):
         return self._finfo.__repr__()
 
 
-def _broadcast_strides(X_shape, X_strides, res_ndim):
-    """
-    Broadcasts strides to match the given dimensions;
-    returns tuple type strides.
-    """
-    out_strides = [0] * res_ndim
-    X_shape_len = len(X_shape)
-    str_dim = -X_shape_len
-    for i in range(X_shape_len):
-        shape_value = X_shape[i]
-        if not shape_value == 1:
-            out_strides[str_dim] = X_strides[i]
-        str_dim += 1
-
-    return tuple(out_strides)
-
-
 def _broadcast_shape_impl(shapes):
     if len(set(shapes)) == 1:
         return shapes[0]
@@ -504,8 +490,10 @@ def _arrays_validation(arrays, check_ndim=True):
     _supported_dtype(Xi.dtype for Xi in arrays)
 
     res_dtype = X0.dtype
+    dev = exec_q.sycl_device
     for i in range(1, n):
         res_dtype = np.promote_types(res_dtype, arrays[i])
+        res_dtype = _to_device_supported_dtype(res_dtype, dev)
 
     if check_ndim:
         for i in range(1, n):
@@ -554,8 +542,13 @@ def _concat_axis_None(arrays):
                 sycl_queue=exec_q,
             )
         else:
+            src_ = array
+            # _copy_usm_ndarray_for_reshape requires src and dst to have
+            # the same data type
+            if not array.dtype == res_dtype:
+                src_ = dpt.astype(src_, res_dtype)
             hev, _ = ti._copy_usm_ndarray_for_reshape(
-                src=array,
+                src=src_,
                 dst=res[fill_start:fill_end],
                 shift=0,
                 sycl_queue=exec_q,
diff --git a/dpctl/tensor/_slicing.pxi b/dpctl/tensor/_slicing.pxi
index 361dd906c3..4f45d57391 100644
--- a/dpctl/tensor/_slicing.pxi
+++ b/dpctl/tensor/_slicing.pxi
@@ -33,9 +33,13 @@ cdef Py_ssize_t _slice_len(
     if sl_start == sl_stop:
         return 0
     if sl_step > 0:
+        if sl_start > sl_stop:
+            return 0
         # 1 + argmax k such htat sl_start + sl_step*k < sl_stop
         return 1 + ((sl_stop - sl_start - 1) // sl_step)
     else:
+        if sl_start < sl_stop:
+            return 0
         return 1 + ((sl_stop - sl_start + 1) // sl_step)
 
 
@@ -221,6 +225,9 @@ def _basic_slice_meta(ind, shape : tuple, strides : tuple, offset : int):
                 k_new = k + ellipses_count
                 new_shape.extend(shape[k:k_new])
                 new_strides.extend(strides[k:k_new])
+                if any(dim == 0 for dim in shape[k:k_new]):
+                    is_empty = True
+                    new_offset = offset
                 k = k_new
             elif ind_i is None:
                 new_shape.append(1)
@@ -236,6 +243,7 @@ def _basic_slice_meta(ind, shape : tuple, strides : tuple, offset : int):
                     new_offset = new_offset + sl_start * strides[k]
                 if sh_i == 0:
                     is_empty = True
+                    new_offset = offset
                 k = k_new
             elif _is_boolean(ind_i):
                 new_shape.append(1 if ind_i else 0)
diff --git a/dpctl/tensor/_stride_utils.pxi b/dpctl/tensor/_stride_utils.pxi
index 896c31e65a..bc7349a5e6 100644
--- a/dpctl/tensor/_stride_utils.pxi
+++ b/dpctl/tensor/_stride_utils.pxi
@@ -64,6 +64,7 @@ cdef int _from_input_shape_strides(
     cdef int j
     cdef bint all_incr = 1
     cdef bint all_decr = 1
+    cdef bint strides_inspected = 0
     cdef Py_ssize_t elem_count = 1
     cdef Py_ssize_t min_shift = 0
     cdef Py_ssize_t max_shift = 0
@@ -165,6 +166,7 @@ cdef int _from_input_shape_strides(
                 while (j < nd and shape_arr[j] == 1):
                     j = j + 1
                 if j < nd:
+                    strides_inspected = 1
                     if all_incr:
                         all_incr = (
                             (strides_arr[i] > 0) and
@@ -179,7 +181,18 @@ cdef int _from_input_shape_strides(
                         )
                     i = j
                 else:
+                    if not strides_inspected:
+                        # all dimensions have size 1 except
+                        # dimension 'i'. Array is both C and F
+                        # contiguous
+                        strides_inspected = 1
+                        all_incr = (strides_arr[i] == 1)
+                        all_decr = all_incr
                     break
+            # should only set contig flags on actually obtained
+            # values, rather than default values
+            all_incr = all_incr and strides_inspected
+            all_decr = all_decr and strides_inspected
             if all_incr and all_decr:
                 contig[0] = (USM_ARRAY_C_CONTIGUOUS | USM_ARRAY_F_CONTIGUOUS)
             elif all_incr:
diff --git a/dpctl/tensor/_type_utils.py b/dpctl/tensor/_type_utils.py
index fb2223f292..b576764689 100644
--- a/dpctl/tensor/_type_utils.py
+++ b/dpctl/tensor/_type_utils.py
@@ -14,8 +14,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import builtins
-
 import dpctl.tensor as dpt
 import dpctl.tensor._tensor_impl as ti
 
@@ -116,96 +114,6 @@ def _can_cast(from_: dpt.dtype, to_: dpt.dtype, _fp16: bool, _fp64: bool):
     return can_cast_v
 
 
-def _empty_like_orderK(X, dt, usm_type=None, dev=None):
-    """Returns empty array like `x`, using order='K'
-
-    For an array `x` that was obtained by permutation of a contiguous
-    array the returned array will have the same shape and the same
-    strides as `x`.
-    """
-    if not isinstance(X, dpt.usm_ndarray):
-        raise TypeError(f"Expected usm_ndarray, got {type(X)}")
-    if usm_type is None:
-        usm_type = X.usm_type
-    if dev is None:
-        dev = X.device
-    fl = X.flags
-    if fl["C"] or X.size <= 1:
-        return dpt.empty_like(
-            X, dtype=dt, usm_type=usm_type, device=dev, order="C"
-        )
-    elif fl["F"]:
-        return dpt.empty_like(
-            X, dtype=dt, usm_type=usm_type, device=dev, order="F"
-        )
-    st = list(X.strides)
-    perm = sorted(
-        range(X.ndim), key=lambda d: builtins.abs(st[d]), reverse=True
-    )
-    inv_perm = sorted(range(X.ndim), key=lambda i: perm[i])
-    st_sorted = [st[i] for i in perm]
-    sh = X.shape
-    sh_sorted = tuple(sh[i] for i in perm)
-    R = dpt.empty(sh_sorted, dtype=dt, usm_type=usm_type, device=dev, order="C")
-    if min(st_sorted) < 0:
-        sl = tuple(
-            slice(None, None, -1)
-            if st_sorted[i] < 0
-            else slice(None, None, None)
-            for i in range(X.ndim)
-        )
-        R = R[sl]
-    return dpt.permute_dims(R, inv_perm)
-
-
-def _empty_like_pair_orderK(X1, X2, dt, res_shape, usm_type, dev):
-    if not isinstance(X1, dpt.usm_ndarray):
-        raise TypeError(f"Expected usm_ndarray, got {type(X1)}")
-    if not isinstance(X2, dpt.usm_ndarray):
-        raise TypeError(f"Expected usm_ndarray, got {type(X2)}")
-    nd1 = X1.ndim
-    nd2 = X2.ndim
-    if nd1 > nd2 and X1.shape == res_shape:
-        return _empty_like_orderK(X1, dt, usm_type, dev)
-    elif nd1 < nd2 and X2.shape == res_shape:
-        return _empty_like_orderK(X2, dt, usm_type, dev)
-    fl1 = X1.flags
-    fl2 = X2.flags
-    if fl1["C"] or fl2["C"]:
-        return dpt.empty(
-            res_shape, dtype=dt, usm_type=usm_type, device=dev, order="C"
-        )
-    if fl1["F"] and fl2["F"]:
-        return dpt.empty(
-            res_shape, dtype=dt, usm_type=usm_type, device=dev, order="F"
-        )
-    st1 = list(X1.strides)
-    st2 = list(X2.strides)
-    max_ndim = max(nd1, nd2)
-    st1 += [0] * (max_ndim - len(st1))
-    st2 += [0] * (max_ndim - len(st2))
-    perm = sorted(
-        range(max_ndim),
-        key=lambda d: (builtins.abs(st1[d]), builtins.abs(st2[d])),
-        reverse=True,
-    )
-    inv_perm = sorted(range(max_ndim), key=lambda i: perm[i])
-    st1_sorted = [st1[i] for i in perm]
-    st2_sorted = [st2[i] for i in perm]
-    sh = res_shape
-    sh_sorted = tuple(sh[i] for i in perm)
-    R = dpt.empty(sh_sorted, dtype=dt, usm_type=usm_type, device=dev, order="C")
-    if max(min(st1_sorted), min(st2_sorted)) < 0:
-        sl = tuple(
-            slice(None, None, -1)
-            if (st1_sorted[i] < 0 and st2_sorted[i] < 0)
-            else slice(None, None, None)
-            for i in range(nd1)
-        )
-        R = R[sl]
-    return dpt.permute_dims(R, inv_perm)
-
-
 def _to_device_supported_dtype(dt, dev):
     has_fp16 = dev.has_aspect_fp16
     has_fp64 = dev.has_aspect_fp64
@@ -339,8 +247,6 @@ def _find_inplace_dtype(lhs_dtype, rhs_dtype, query_fn, sycl_dev):
     "_find_buf_dtype",
     "_find_buf_dtype2",
     "_find_inplace_dtype",
-    "_empty_like_orderK",
-    "_empty_like_pair_orderK",
     "_to_device_supported_dtype",
     "_acceptance_fn_default",
     "_acceptance_fn_divide",
diff --git a/dpctl/tensor/_usmarray.pyx b/dpctl/tensor/_usmarray.pyx
index 5b1bd5f6a3..98986d84d6 100644
--- a/dpctl/tensor/_usmarray.pyx
+++ b/dpctl/tensor/_usmarray.pyx
@@ -764,6 +764,8 @@ cdef class usm_ndarray:
             ind, (<object>self).shape, (<object> self).strides,
             self.get_offset())
         cdef usm_ndarray res
+        cdef int i = 0
+        cdef bint matching = 1
 
         if len(_meta) < 5:
             raise RuntimeError
@@ -787,7 +789,20 @@ cdef class usm_ndarray:
 
         from ._copy_utils import _extract_impl, _nonzero_impl, _take_multi_index
         if len(adv_ind) == 1 and adv_ind[0].dtype == dpt_bool:
-            return _extract_impl(res, adv_ind[0], axis=adv_ind_start_p)
+            key_ = adv_ind[0]
+            adv_ind_end_p = key_.ndim + adv_ind_start_p
+            if adv_ind_end_p > res.ndim:
+                raise IndexError("too many indices for the array")
+            key_shape = key_.shape
+            arr_shape = res.shape[adv_ind_start_p:adv_ind_end_p]
+            for i in range(key_.ndim):
+                if matching:
+                    if not key_shape[i] == arr_shape[i] and key_shape[i] > 0:
+                        matching = 0
+            if not matching:
+                raise IndexError("boolean index did not match indexed array in dimensions")
+            res = _extract_impl(res, key_, axis=adv_ind_start_p)
+            return res
 
         if any(ind.dtype == dpt_bool for ind in adv_ind):
             adv_ind_int = list()
@@ -801,7 +816,7 @@ cdef class usm_ndarray:
         return _take_multi_index(res, adv_ind, adv_ind_start_p)
 
 
-    def to_device(self, target):
+    def to_device(self, target, stream=None):
         """ to_device(target_device)
 
         Transfers this array to specified target device.
@@ -841,6 +856,14 @@ cdef class usm_ndarray:
         cdef c_dpctl.DPCTLSyclQueueRef QRef = NULL
         cdef c_dpmem._Memory arr_buf
         d = Device.create_device(target)
+
+        if (stream is None or type(stream) is not dpctl.SyclQueue or
+            stream == self.sycl_queue):
+            pass
+        else:
+            ev = self.sycl_queue.submit_barrier()
+            stream.submit_barrier(dependent_events=[ev])
+
         if (d.sycl_context == self.sycl_context):
             arr_buf = <c_dpmem._Memory> self.usm_data
             QRef = (<c_dpctl.SyclQueue> d.sycl_queue).get_queue_ref()
@@ -857,7 +880,7 @@ cdef class usm_ndarray:
                 strides=self.strides,
                 offset=self.get_offset()
             )
-            res.flags_ = self.flags.flags
+            res.flags_ = self.flags_
             return res
         else:
             nbytes = self.usm_data.nbytes
@@ -872,7 +895,7 @@ cdef class usm_ndarray:
                 strides=self.strides,
                 offset=self.get_offset()
             )
-            res.flags_ = self.flags.flags
+            res.flags_ = self.flags_
             return res
 
     def _set_namespace(self, mod):
@@ -884,12 +907,14 @@ cdef class usm_ndarray:
         Returns array namespace, member functions of which
         implement data API.
         """
-        return self.array_namespace_
+        return self.array_namespace_ if self.array_namespace_ is not None else dpctl.tensor
 
     def __bool__(self):
         if self.size == 1:
             mem_view = dpmem.as_usm_memory(self)
-            return mem_view.copy_to_host().view(self.dtype).__bool__()
+            host_buf = mem_view.copy_to_host()
+            view = host_buf.view(self.dtype)
+            return view.__bool__()
 
         if self.size == 0:
             raise ValueError(
@@ -898,13 +923,15 @@ cdef class usm_ndarray:
 
         raise ValueError(
             "The truth value of an array with more than one element is "
-            "ambiguous. Use a.any() or a.all()"
+            "ambiguous. Use dpctl.tensor.any() or dpctl.tensor.all()"
         )
 
     def __float__(self):
         if self.size == 1:
             mem_view = dpmem.as_usm_memory(self)
-            return mem_view.copy_to_host().view(self.dtype).__float__()
+            host_buf = mem_view.copy_to_host()
+            view = host_buf.view(self.dtype)
+            return view.__float__()
 
         raise ValueError(
             "only size-1 arrays can be converted to Python scalars"
@@ -913,7 +940,9 @@ cdef class usm_ndarray:
     def __complex__(self):
         if self.size == 1:
             mem_view = dpmem.as_usm_memory(self)
-            return mem_view.copy_to_host().view(self.dtype).__complex__()
+            host_buf = mem_view.copy_to_host()
+            view = host_buf.view(self.dtype)
+            return view.__complex__()
 
         raise ValueError(
             "only size-1 arrays can be converted to Python scalars"
@@ -922,7 +951,9 @@ cdef class usm_ndarray:
     def __int__(self):
         if self.size == 1:
             mem_view = dpmem.as_usm_memory(self)
-            return mem_view.copy_to_host().view(self.dtype).__int__()
+            host_buf = mem_view.copy_to_host()
+            view = host_buf.view(self.dtype)
+            return view.__int__()
 
         raise ValueError(
             "only size-1 arrays can be converted to Python scalars"
@@ -957,9 +988,9 @@ cdef class usm_ndarray:
     def __and__(first, other):
         "See comment in __add__"
         if isinstance(first, usm_ndarray):
-            return _dispatch_binary_elementwise(first, "logical_and", other)
+            return _dispatch_binary_elementwise(first, "bitwise_and", other)
         elif isinstance(other, usm_ndarray):
-            return _dispatch_binary_elementwise2(first, "logical_and", other)
+            return _dispatch_binary_elementwise2(first, "bitwise_and", other)
         return NotImplemented
 
     def __dlpack__(self, stream=None):
@@ -1023,7 +1054,7 @@ cdef class usm_ndarray:
         return _dispatch_binary_elementwise(self, "greater", other)
 
     def __invert__(self):
-        return _dispatch_unary_elementwise(self, "invert")
+        return _dispatch_unary_elementwise(self, "bitwise_invert")
 
     def __le__(self, other):
         return _dispatch_binary_elementwise(self, "less_equal", other)
@@ -1037,9 +1068,9 @@ cdef class usm_ndarray:
     def __lshift__(first, other):
         "See comment in __add__"
         if isinstance(first, usm_ndarray):
-            return _dispatch_binary_elementwise(first, "left_shift", other)
+            return _dispatch_binary_elementwise(first, "bitwise_left_shift", other)
         elif isinstance(other, usm_ndarray):
-            return _dispatch_binary_elementwise2(first, "left_shift", other)
+            return _dispatch_binary_elementwise2(first, "bitwise_left_shift", other)
         return NotImplemented
 
     def __lt__(self, other):
@@ -1056,9 +1087,9 @@ cdef class usm_ndarray:
     def __mod__(first, other):
         "See comment in __add__"
         if isinstance(first, usm_ndarray):
-            return _dispatch_binary_elementwise(first, "mod", other)
+            return _dispatch_binary_elementwise(first, "remainder", other)
         elif isinstance(other, usm_ndarray):
-            return _dispatch_binary_elementwise2(first, "mod", other)
+            return _dispatch_binary_elementwise2(first, "remainder", other)
         return NotImplemented
 
     def __mul__(first, other):
@@ -1078,9 +1109,9 @@ cdef class usm_ndarray:
     def __or__(first, other):
         "See comment in __add__"
         if isinstance(first, usm_ndarray):
-            return _dispatch_binary_elementwise(first, "logical_or", other)
+            return _dispatch_binary_elementwise(first, "bitwise_or", other)
         elif isinstance(other, usm_ndarray):
-            return _dispatch_binary_elementwise2(first, "logical_or", other)
+            return _dispatch_binary_elementwise2(first, "bitwise_or", other)
         return NotImplemented
 
     def __pos__(self):
@@ -1090,17 +1121,17 @@ cdef class usm_ndarray:
         "See comment in __add__"
         if mod is None:
             if isinstance(first, usm_ndarray):
-                return _dispatch_binary_elementwise(first, "power", other)
+                return _dispatch_binary_elementwise(first, "pow", other)
             elif isinstance(other, usm_ndarray):
-                return _dispatch_binary_elementwise(first, "power", other)
+                return _dispatch_binary_elementwise(first, "pow", other)
         return NotImplemented
 
     def __rshift__(first, other):
         "See comment in __add__"
         if isinstance(first, usm_ndarray):
-            return _dispatch_binary_elementwise(first, "right_shift", other)
+            return _dispatch_binary_elementwise(first, "bitwise_right_shift", other)
         elif isinstance(other, usm_ndarray):
-            return _dispatch_binary_elementwise2(first, "right_shift", other)
+            return _dispatch_binary_elementwise2(first, "bitwise_right_shift", other)
         return NotImplemented
 
     def __setitem__(self, key, rhs):
@@ -1202,57 +1233,57 @@ cdef class usm_ndarray:
     def __truediv__(first, other):
         "See comment in __add__"
         if isinstance(first, usm_ndarray):
-            return _dispatch_binary_elementwise(first, "true_divide", other)
+            return _dispatch_binary_elementwise(first, "divide", other)
         elif isinstance(other, usm_ndarray):
-            return _dispatch_binary_elementwise2(first, "true_divide", other)
+            return _dispatch_binary_elementwise2(first, "divide", other)
         return NotImplemented
 
     def __xor__(first, other):
         "See comment in __add__"
         if isinstance(first, usm_ndarray):
-            return _dispatch_binary_elementwise(first, "logical_xor", other)
+            return _dispatch_binary_elementwise(first, "bitwise_xor", other)
         elif isinstance(other, usm_ndarray):
-            return _dispatch_binary_elementwise2(first, "logical_xor", other)
+            return _dispatch_binary_elementwise2(first, "bitwise_xor", other)
         return NotImplemented
 
     def __radd__(self, other):
         return _dispatch_binary_elementwise(self, "add", other)
 
     def __rand__(self, other):
-        return _dispatch_binary_elementwise(self, "logical_and", other)
+        return _dispatch_binary_elementwise(self, "bitwise_and", other)
 
     def __rfloordiv__(self, other):
         return _dispatch_binary_elementwise2(other, "floor_divide", self)
 
     def __rlshift__(self, other):
-        return _dispatch_binary_elementwise2(other, "left_shift", self)
+        return _dispatch_binary_elementwise2(other, "bitwise_left_shift", self)
 
     def __rmatmul__(self, other):
         return _dispatch_binary_elementwise2(other, "matmul", self)
 
     def __rmod__(self, other):
-        return _dispatch_binary_elementwise2(other, "mod", self)
+        return _dispatch_binary_elementwise2(other, "remainder", self)
 
     def __rmul__(self, other):
         return _dispatch_binary_elementwise(self, "multiply", other)
 
     def __ror__(self, other):
-        return _dispatch_binary_elementwise(self, "logical_or", other)
+        return _dispatch_binary_elementwise(self, "bitwise_or", other)
 
     def __rpow__(self, other):
-        return _dispatch_binary_elementwise2(other, "power", self)
+        return _dispatch_binary_elementwise2(other, "pow", self)
 
     def __rrshift__(self, other):
-        return _dispatch_binary_elementwise2(other, "right_shift", self)
+        return _dispatch_binary_elementwise2(other, "bitwise_right_shift", self)
 
     def __rsub__(self, other):
         return _dispatch_binary_elementwise2(other, "subtract", self)
 
     def __rtruediv__(self, other):
-        return _dispatch_binary_elementwise2(other, "true_divide", self)
+        return _dispatch_binary_elementwise2(other, "divide", self)
 
     def __rxor__(self, other):
-        return _dispatch_binary_elementwise2(other, "logical_xor", self)
+        return _dispatch_binary_elementwise2(other, "bitwise_xor", self)
 
     def __iadd__(self, other):
         from ._elementwise_funcs import add
diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/expm1.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/expm1.hpp
index 7b59285884..b996a6d0ec 100644
--- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/expm1.hpp
+++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/expm1.hpp
@@ -28,6 +28,7 @@
 #include <cmath>
 #include <cstddef>
 #include <cstdint>
+#include <limits>
 #include <type_traits>
 
 #include "kernels/elementwise_functions/common.hpp"
@@ -73,6 +74,46 @@ template <typename argT, typename resT> struct Expm1Functor
             const realT x = std::real(in);
             const realT y = std::imag(in);
 
+            // special cases
+            if (std::isinf(x)) {
+                if (x > realT(0)) {
+                    // positive infinity cases
+                    if (!std::isfinite(y)) {
+                        return resT{x, std::numeric_limits<realT>::quiet_NaN()};
+                    }
+                    else if (y == realT(0)) {
+                        return in;
+                    }
+                    else {
+                        return (resT{std::copysign(x, std::cos(y)),
+                                     std::copysign(x, std::sin(y))});
+                    }
+                }
+                else {
+                    // negative infinity cases
+                    if (!std::isfinite(y)) {
+                        // copy sign of y to guarantee
+                        // conj(expm1(x)) == expm1(conj(x))
+                        return resT{realT(-1), std::copysign(realT(0), y)};
+                    }
+                    else {
+                        return resT{realT(-1),
+                                    std::copysign(realT(0), std::sin(y))};
+                    }
+                }
+            }
+
+            if (std::isnan(x)) {
+                if (y == realT(0)) {
+                    return in;
+                }
+                else {
+                    return resT{std::numeric_limits<realT>::quiet_NaN(),
+                                std::numeric_limits<realT>::quiet_NaN()};
+                }
+            }
+
+            // x, y finite numbers
             realT cosY_val;
             const realT sinY_val = sycl::sincos(y, &cosY_val);
             const realT sinhalfY_val = std::sin(y / 2);
diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/greater.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/greater.hpp
index d1db67f116..ab2ce9b5c3 100644
--- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/greater.hpp
+++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/greater.hpp
@@ -30,6 +30,7 @@
 #include <cstdint>
 #include <type_traits>
 
+#include "utils/math_utils.hpp"
 #include "utils/offset_utils.hpp"
 #include "utils/type_dispatch.hpp"
 #include "utils/type_utils.hpp"
@@ -67,12 +68,8 @@ template <typename argT1, typename argT2, typename resT> struct GreaterFunctor
                       tu_ns::is_complex<argT2>::value)
         {
             static_assert(std::is_same_v<argT1, argT2>);
-            using realT = typename argT1::value_type;
-            realT real1 = std::real(in1);
-            realT real2 = std::real(in2);
-
-            return (real1 == real2) ? (std::imag(in1) > std::imag(in2))
-                                    : real1 > real2;
+            using dpctl::tensor::math_utils::greater_complex;
+            return greater_complex<argT1>(in1, in2);
         }
         else {
             return (in1 > in2);
diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/greater_equal.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/greater_equal.hpp
index 9b6fac1214..fa2ece210f 100644
--- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/greater_equal.hpp
+++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/greater_equal.hpp
@@ -30,6 +30,7 @@
 #include <cstdint>
 #include <type_traits>
 
+#include "utils/math_utils.hpp"
 #include "utils/offset_utils.hpp"
 #include "utils/type_dispatch.hpp"
 #include "utils/type_utils.hpp"
@@ -68,12 +69,8 @@ struct GreaterEqualFunctor
                       tu_ns::is_complex<argT2>::value)
         {
             static_assert(std::is_same_v<argT1, argT2>);
-            using realT = typename argT1::value_type;
-            realT real1 = std::real(in1);
-            realT real2 = std::real(in2);
-
-            return (real1 == real2) ? (std::imag(in1) >= std::imag(in2))
-                                    : real1 >= real2;
+            using dpctl::tensor::math_utils::greater_equal_complex;
+            return greater_equal_complex<argT1>(in1, in2);
         }
         else {
             return (in1 >= in2);
diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/less.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/less.hpp
index 4829c30f54..36675a2e19 100644
--- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/less.hpp
+++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/less.hpp
@@ -29,6 +29,7 @@
 #include <cstdint>
 #include <type_traits>
 
+#include "utils/math_utils.hpp"
 #include "utils/offset_utils.hpp"
 #include "utils/type_dispatch.hpp"
 #include "utils/type_utils.hpp"
@@ -66,12 +67,8 @@ template <typename argT1, typename argT2, typename resT> struct LessFunctor
                       tu_ns::is_complex<argT2>::value)
         {
             static_assert(std::is_same_v<argT1, argT2>);
-            using realT = typename argT1::value_type;
-            realT real1 = std::real(in1);
-            realT real2 = std::real(in2);
-
-            return (real1 == real2) ? (std::imag(in1) < std::imag(in2))
-                                    : real1 < real2;
+            using dpctl::tensor::math_utils::less_complex;
+            return less_complex<argT1>(in1, in2);
         }
         else {
             return (in1 < in2);
diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/less_equal.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/less_equal.hpp
index 31f5c0b4ba..9b740219fc 100644
--- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/less_equal.hpp
+++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/less_equal.hpp
@@ -30,6 +30,7 @@
 #include <cstdint>
 #include <type_traits>
 
+#include "utils/math_utils.hpp"
 #include "utils/offset_utils.hpp"
 #include "utils/type_dispatch.hpp"
 #include "utils/type_utils.hpp"
@@ -67,12 +68,8 @@ template <typename argT1, typename argT2, typename resT> struct LessEqualFunctor
                       tu_ns::is_complex<argT2>::value)
         {
             static_assert(std::is_same_v<argT1, argT2>);
-            using realT = typename argT1::value_type;
-            realT real1 = std::real(in1);
-            realT real2 = std::real(in2);
-
-            return (real1 == real2) ? (std::imag(in1) <= std::imag(in2))
-                                    : real1 <= real2;
+            using dpctl::tensor::math_utils::less_equal_complex;
+            return less_equal_complex<argT1>(in1, in2);
         }
         else {
             return (in1 <= in2);
diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp
index 39ba5c3fcf..b718a5f991 100644
--- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp
+++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp
@@ -28,6 +28,7 @@
 #include <CL/sycl.hpp>
 #include <cstddef>
 #include <cstdint>
+#include <limits>
 #include <type_traits>
 
 #include "utils/offset_utils.hpp"
@@ -55,16 +56,12 @@ using dpctl::tensor::type_utils::vec_cast;
 
 template <typename argT1, typename argT2, typename resT> struct LogAddExpFunctor
 {
-    using supports_sg_loadstore = typename std::negation<
-        std::disjunction<tu_ns::is_complex<argT1>, tu_ns::is_complex<argT2>>>;
-    using supports_vec = typename std::negation<
-        std::disjunction<tu_ns::is_complex<argT1>, tu_ns::is_complex<argT2>>>;
+    using supports_sg_loadstore = std::true_type;
+    using supports_vec = std::true_type;
 
     resT operator()(const argT1 &in1, const argT2 &in2)
     {
-        resT max = std::max<resT>(in1, in2);
-        resT min = std::min<resT>(in1, in2);
-        return max + std::log1p(std::exp(min - max));
+        return impl<resT>(in1, in2);
     }
 
     template <int vec_sz>
@@ -72,16 +69,48 @@ template <typename argT1, typename argT2, typename resT> struct LogAddExpFunctor
                                        const sycl::vec<argT2, vec_sz> &in2)
     {
         sycl::vec<resT, vec_sz> res;
-        auto diff = in1 - in2;
+        auto diff = in1 - in2; // take advantange of faster vec arithmetic
 
 #pragma unroll
         for (int i = 0; i < vec_sz; ++i) {
-            resT max = std::max<resT>(in1[i], in2[i]);
-            res[i] = max + std::log1p(std::exp(std::abs(diff[i])));
+            if (std::isfinite(diff[i])) {
+                res[i] = std::max<resT>(in1[i], in2[i]) +
+                         impl_finite<resT>(-std::abs(diff[i]));
+            }
+            else {
+                res[i] = impl<resT>(in1[i], in2[i]);
+            }
         }
 
         return res;
     }
+
+private:
+    template <typename T> T impl(T const &in1, T const &in2)
+    {
+        if (in1 == in2) { // handle signed infinities
+            const T log2 = std::log(T(2));
+            return in1 + log2;
+        }
+        else {
+            const T tmp = in1 - in2;
+            if (tmp > 0) {
+                return in1 + std::log1p(std::exp(-tmp));
+            }
+            else if (tmp <= 0) {
+                return in2 + std::log1p(std::exp(tmp));
+            }
+            else {
+                return std::numeric_limits<T>::quiet_NaN();
+            }
+        }
+    }
+
+    template <typename T> T impl_finite(T const &in)
+    {
+        return (in > 0) ? (in + std::log1p(std::exp(-in)))
+                        : std::log1p(std::exp(in));
+    }
 };
 
 template <typename argT1,
diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/maximum.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/maximum.hpp
new file mode 100644
index 0000000000..60afc2fc15
--- /dev/null
+++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/maximum.hpp
@@ -0,0 +1,279 @@
+//=== maximum.hpp -   Binary function MAXIMUM            ------  *-C++-*--/===//
+//
+//                      Data Parallel Control (dpctl)
+//
+// Copyright 2020-2023 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//===---------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines kernels for elementwise evaluation of MAXIMUM(x1, x2)
+/// function.
+//===---------------------------------------------------------------------===//
+
+#pragma once
+#include <CL/sycl.hpp>
+#include <cstddef>
+#include <cstdint>
+#include <type_traits>
+
+#include "utils/math_utils.hpp"
+#include "utils/offset_utils.hpp"
+#include "utils/type_dispatch.hpp"
+#include "utils/type_utils.hpp"
+
+#include "kernels/elementwise_functions/common.hpp"
+#include <pybind11/pybind11.h>
+
+namespace dpctl
+{
+namespace tensor
+{
+namespace kernels
+{
+namespace maximum
+{
+
+namespace py = pybind11;
+namespace td_ns = dpctl::tensor::type_dispatch;
+namespace tu_ns = dpctl::tensor::type_utils;
+
+template <typename argT1, typename argT2, typename resT> struct MaximumFunctor
+{
+
+    using supports_sg_loadstore = std::negation<
+        std::disjunction<tu_ns::is_complex<argT1>, tu_ns::is_complex<argT2>>>;
+    using supports_vec = std::conjunction<
+        std::is_same<argT1, argT2>,
+        std::negation<std::disjunction<tu_ns::is_complex<argT1>,
+                                       tu_ns::is_complex<argT2>>>>;
+
+    resT operator()(const argT1 &in1, const argT2 &in2)
+    {
+        if constexpr (tu_ns::is_complex<argT1>::value ||
+                      tu_ns::is_complex<argT2>::value)
+        {
+            static_assert(std::is_same_v<argT1, argT2>);
+            using dpctl::tensor::math_utils::max_complex;
+            return max_complex<argT1>(in1, in2);
+        }
+        else if constexpr (std::is_floating_point_v<argT1> ||
+                           std::is_same_v<argT1, sycl::half>)
+            return (std::isnan(in1) || in1 > in2) ? in1 : in2;
+        else
+            return (in1 > in2) ? in1 : in2;
+    }
+
+    template <int vec_sz>
+    sycl::vec<resT, vec_sz> operator()(const sycl::vec<argT1, vec_sz> &in1,
+                                       const sycl::vec<argT2, vec_sz> &in2)
+    {
+        sycl::vec<resT, vec_sz> res;
+#pragma unroll
+        for (int i = 0; i < vec_sz; ++i) {
+            if constexpr (std::is_floating_point_v<argT1>)
+                res[i] =
+                    (sycl::isnan(in1[i]) || in1[i] > in2[i]) ? in1[i] : in2[i];
+            else
+                res[i] = (in1[i] > in2[i]) ? in1[i] : in2[i];
+        }
+        return res;
+    }
+};
+
+template <typename argT1,
+          typename argT2,
+          typename resT,
+          unsigned int vec_sz = 4,
+          unsigned int n_vecs = 2>
+using MaximumContigFunctor =
+    elementwise_common::BinaryContigFunctor<argT1,
+                                            argT2,
+                                            resT,
+                                            MaximumFunctor<argT1, argT2, resT>,
+                                            vec_sz,
+                                            n_vecs>;
+
+template <typename argT1, typename argT2, typename resT, typename IndexerT>
+using MaximumStridedFunctor = elementwise_common::BinaryStridedFunctor<
+    argT1,
+    argT2,
+    resT,
+    IndexerT,
+    MaximumFunctor<argT1, argT2, resT>>;
+
+template <typename T1, typename T2> struct MaximumOutputType
+{
+    using value_type = typename std::disjunction< // disjunction is C++17
+                                                  // feature, supported by DPC++
+        td_ns::BinaryTypeMapResultEntry<T1, bool, T2, bool, bool>,
+        td_ns::BinaryTypeMapResultEntry<T1,
+                                        std::uint8_t,
+                                        T2,
+                                        std::uint8_t,
+                                        std::uint8_t>,
+        td_ns::BinaryTypeMapResultEntry<T1,
+                                        std::int8_t,
+                                        T2,
+                                        std::int8_t,
+                                        std::int8_t>,
+        td_ns::BinaryTypeMapResultEntry<T1,
+                                        std::uint16_t,
+                                        T2,
+                                        std::uint16_t,
+                                        std::uint16_t>,
+        td_ns::BinaryTypeMapResultEntry<T1,
+                                        std::int16_t,
+                                        T2,
+                                        std::int16_t,
+                                        std::int16_t>,
+        td_ns::BinaryTypeMapResultEntry<T1,
+                                        std::uint32_t,
+                                        T2,
+                                        std::uint32_t,
+                                        std::uint32_t>,
+        td_ns::BinaryTypeMapResultEntry<T1,
+                                        std::int32_t,
+                                        T2,
+                                        std::int32_t,
+                                        std::int32_t>,
+        td_ns::BinaryTypeMapResultEntry<T1,
+                                        std::uint64_t,
+                                        T2,
+                                        std::uint64_t,
+                                        std::uint64_t>,
+        td_ns::BinaryTypeMapResultEntry<T1,
+                                        std::int64_t,
+                                        T2,
+                                        std::int64_t,
+                                        std::int64_t>,
+        td_ns::BinaryTypeMapResultEntry<T1,
+                                        sycl::half,
+                                        T2,
+                                        sycl::half,
+                                        sycl::half>,
+        td_ns::BinaryTypeMapResultEntry<T1, float, T2, float, float>,
+        td_ns::BinaryTypeMapResultEntry<T1, double, T2, double, double>,
+        td_ns::BinaryTypeMapResultEntry<T1,
+                                        std::complex<float>,
+                                        T2,
+                                        std::complex<float>,
+                                        std::complex<float>>,
+        td_ns::BinaryTypeMapResultEntry<T1,
+                                        std::complex<double>,
+                                        T2,
+                                        std::complex<double>,
+                                        std::complex<double>>,
+        td_ns::DefaultResultEntry<void>>::result_type;
+};
+
+template <typename argT1,
+          typename argT2,
+          typename resT,
+          unsigned int vec_sz,
+          unsigned int n_vecs>
+class maximum_contig_kernel;
+
+template <typename argTy1, typename argTy2>
+sycl::event maximum_contig_impl(sycl::queue exec_q,
+                                size_t nelems,
+                                const char *arg1_p,
+                                py::ssize_t arg1_offset,
+                                const char *arg2_p,
+                                py::ssize_t arg2_offset,
+                                char *res_p,
+                                py::ssize_t res_offset,
+                                const std::vector<sycl::event> &depends = {})
+{
+    return elementwise_common::binary_contig_impl<
+        argTy1, argTy2, MaximumOutputType, MaximumContigFunctor,
+        maximum_contig_kernel>(exec_q, nelems, arg1_p, arg1_offset, arg2_p,
+                               arg2_offset, res_p, res_offset, depends);
+}
+
+template <typename fnT, typename T1, typename T2> struct MaximumContigFactory
+{
+    fnT get()
+    {
+        if constexpr (std::is_same_v<
+                          typename MaximumOutputType<T1, T2>::value_type, void>)
+        {
+            fnT fn = nullptr;
+            return fn;
+        }
+        else {
+            fnT fn = maximum_contig_impl<T1, T2>;
+            return fn;
+        }
+    }
+};
+
+template <typename fnT, typename T1, typename T2> struct MaximumTypeMapFactory
+{
+    /*! @brief get typeid for output type of maximum(T1 x, T2 y) */
+    std::enable_if_t<std::is_same<fnT, int>::value, int> get()
+    {
+        using rT = typename MaximumOutputType<T1, T2>::value_type;
+        ;
+        return td_ns::GetTypeid<rT>{}.get();
+    }
+};
+
+template <typename T1, typename T2, typename resT, typename IndexerT>
+class maximum_strided_kernel;
+
+template <typename argTy1, typename argTy2>
+sycl::event
+maximum_strided_impl(sycl::queue exec_q,
+                     size_t nelems,
+                     int nd,
+                     const py::ssize_t *shape_and_strides,
+                     const char *arg1_p,
+                     py::ssize_t arg1_offset,
+                     const char *arg2_p,
+                     py::ssize_t arg2_offset,
+                     char *res_p,
+                     py::ssize_t res_offset,
+                     const std::vector<sycl::event> &depends,
+                     const std::vector<sycl::event> &additional_depends)
+{
+    return elementwise_common::binary_strided_impl<
+        argTy1, argTy2, MaximumOutputType, MaximumStridedFunctor,
+        maximum_strided_kernel>(exec_q, nelems, nd, shape_and_strides, arg1_p,
+                                arg1_offset, arg2_p, arg2_offset, res_p,
+                                res_offset, depends, additional_depends);
+}
+
+template <typename fnT, typename T1, typename T2> struct MaximumStridedFactory
+{
+    fnT get()
+    {
+        if constexpr (std::is_same_v<
+                          typename MaximumOutputType<T1, T2>::value_type, void>)
+        {
+            fnT fn = nullptr;
+            return fn;
+        }
+        else {
+            fnT fn = maximum_strided_impl<T1, T2>;
+            return fn;
+        }
+    }
+};
+
+} // namespace maximum
+} // namespace kernels
+} // namespace tensor
+} // namespace dpctl
diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/minimum.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/minimum.hpp
new file mode 100644
index 0000000000..d19c829a6c
--- /dev/null
+++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/minimum.hpp
@@ -0,0 +1,279 @@
+//=== minimum.hpp -   Binary function MINIMUM           ------  *-C++-*--/===//
+//
+//                      Data Parallel Control (dpctl)
+//
+// Copyright 2020-2023 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//===---------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines kernels for elementwise evaluation of MINIMUM(x1, x2)
+/// function.
+//===---------------------------------------------------------------------===//
+
+#pragma once
+#include <CL/sycl.hpp>
+#include <cstddef>
+#include <cstdint>
+#include <type_traits>
+
+#include "utils/math_utils.hpp"
+#include "utils/offset_utils.hpp"
+#include "utils/type_dispatch.hpp"
+#include "utils/type_utils.hpp"
+
+#include "kernels/elementwise_functions/common.hpp"
+#include <pybind11/pybind11.h>
+
+namespace dpctl
+{
+namespace tensor
+{
+namespace kernels
+{
+namespace minimum
+{
+
+namespace py = pybind11;
+namespace td_ns = dpctl::tensor::type_dispatch;
+namespace tu_ns = dpctl::tensor::type_utils;
+
+template <typename argT1, typename argT2, typename resT> struct MinimumFunctor
+{
+
+    using supports_sg_loadstore = std::negation<
+        std::disjunction<tu_ns::is_complex<argT1>, tu_ns::is_complex<argT2>>>;
+    using supports_vec = std::conjunction<
+        std::is_same<argT1, argT2>,
+        std::negation<std::disjunction<tu_ns::is_complex<argT1>,
+                                       tu_ns::is_complex<argT2>>>>;
+
+    resT operator()(const argT1 &in1, const argT2 &in2)
+    {
+        if constexpr (tu_ns::is_complex<argT1>::value ||
+                      tu_ns::is_complex<argT2>::value)
+        {
+            static_assert(std::is_same_v<argT1, argT2>);
+            using dpctl::tensor::math_utils::min_complex;
+            return min_complex<argT1>(in1, in2);
+        }
+        else if constexpr (std::is_floating_point_v<argT1> ||
+                           std::is_same_v<argT1, sycl::half>)
+            return (std::isnan(in1) || in1 < in2) ? in1 : in2;
+        else
+            return (in1 < in2) ? in1 : in2;
+    }
+
+    template <int vec_sz>
+    sycl::vec<resT, vec_sz> operator()(const sycl::vec<argT1, vec_sz> &in1,
+                                       const sycl::vec<argT2, vec_sz> &in2)
+    {
+        sycl::vec<resT, vec_sz> res;
+#pragma unroll
+        for (int i = 0; i < vec_sz; ++i) {
+            if constexpr (std::is_floating_point_v<argT1>)
+                res[i] =
+                    (sycl::isnan(in1[i]) || in1[i] < in2[i]) ? in1[i] : in2[i];
+            else
+                res[i] = (in1[i] < in2[i]) ? in1[i] : in2[i];
+        }
+        return res;
+    }
+};
+
+template <typename argT1,
+          typename argT2,
+          typename resT,
+          unsigned int vec_sz = 4,
+          unsigned int n_vecs = 2>
+using MinimumContigFunctor =
+    elementwise_common::BinaryContigFunctor<argT1,
+                                            argT2,
+                                            resT,
+                                            MinimumFunctor<argT1, argT2, resT>,
+                                            vec_sz,
+                                            n_vecs>;
+
+template <typename argT1, typename argT2, typename resT, typename IndexerT>
+using MinimumStridedFunctor = elementwise_common::BinaryStridedFunctor<
+    argT1,
+    argT2,
+    resT,
+    IndexerT,
+    MinimumFunctor<argT1, argT2, resT>>;
+
+template <typename T1, typename T2> struct MinimumOutputType
+{
+    using value_type = typename std::disjunction< // disjunction is C++17
+                                                  // feature, supported by DPC++
+        td_ns::BinaryTypeMapResultEntry<T1, bool, T2, bool, bool>,
+        td_ns::BinaryTypeMapResultEntry<T1,
+                                        std::uint8_t,
+                                        T2,
+                                        std::uint8_t,
+                                        std::uint8_t>,
+        td_ns::BinaryTypeMapResultEntry<T1,
+                                        std::int8_t,
+                                        T2,
+                                        std::int8_t,
+                                        std::int8_t>,
+        td_ns::BinaryTypeMapResultEntry<T1,
+                                        std::uint16_t,
+                                        T2,
+                                        std::uint16_t,
+                                        std::uint16_t>,
+        td_ns::BinaryTypeMapResultEntry<T1,
+                                        std::int16_t,
+                                        T2,
+                                        std::int16_t,
+                                        std::int16_t>,
+        td_ns::BinaryTypeMapResultEntry<T1,
+                                        std::uint32_t,
+                                        T2,
+                                        std::uint32_t,
+                                        std::uint32_t>,
+        td_ns::BinaryTypeMapResultEntry<T1,
+                                        std::int32_t,
+                                        T2,
+                                        std::int32_t,
+                                        std::int32_t>,
+        td_ns::BinaryTypeMapResultEntry<T1,
+                                        std::uint64_t,
+                                        T2,
+                                        std::uint64_t,
+                                        std::uint64_t>,
+        td_ns::BinaryTypeMapResultEntry<T1,
+                                        std::int64_t,
+                                        T2,
+                                        std::int64_t,
+                                        std::int64_t>,
+        td_ns::BinaryTypeMapResultEntry<T1,
+                                        sycl::half,
+                                        T2,
+                                        sycl::half,
+                                        sycl::half>,
+        td_ns::BinaryTypeMapResultEntry<T1, float, T2, float, float>,
+        td_ns::BinaryTypeMapResultEntry<T1, double, T2, double, double>,
+        td_ns::BinaryTypeMapResultEntry<T1,
+                                        std::complex<float>,
+                                        T2,
+                                        std::complex<float>,
+                                        std::complex<float>>,
+        td_ns::BinaryTypeMapResultEntry<T1,
+                                        std::complex<double>,
+                                        T2,
+                                        std::complex<double>,
+                                        std::complex<double>>,
+        td_ns::DefaultResultEntry<void>>::result_type;
+};
+
+template <typename argT1,
+          typename argT2,
+          typename resT,
+          unsigned int vec_sz,
+          unsigned int n_vecs>
+class minimum_contig_kernel;
+
+template <typename argTy1, typename argTy2>
+sycl::event minimum_contig_impl(sycl::queue exec_q,
+                                size_t nelems,
+                                const char *arg1_p,
+                                py::ssize_t arg1_offset,
+                                const char *arg2_p,
+                                py::ssize_t arg2_offset,
+                                char *res_p,
+                                py::ssize_t res_offset,
+                                const std::vector<sycl::event> &depends = {})
+{
+    return elementwise_common::binary_contig_impl<
+        argTy1, argTy2, MinimumOutputType, MinimumContigFunctor,
+        minimum_contig_kernel>(exec_q, nelems, arg1_p, arg1_offset, arg2_p,
+                               arg2_offset, res_p, res_offset, depends);
+}
+
+template <typename fnT, typename T1, typename T2> struct MinimumContigFactory
+{
+    fnT get()
+    {
+        if constexpr (std::is_same_v<
+                          typename MinimumOutputType<T1, T2>::value_type, void>)
+        {
+            fnT fn = nullptr;
+            return fn;
+        }
+        else {
+            fnT fn = minimum_contig_impl<T1, T2>;
+            return fn;
+        }
+    }
+};
+
+template <typename fnT, typename T1, typename T2> struct MinimumTypeMapFactory
+{
+    /*! @brief get typeid for output type of minimum(T1 x, T2 y) */
+    std::enable_if_t<std::is_same<fnT, int>::value, int> get()
+    {
+        using rT = typename MinimumOutputType<T1, T2>::value_type;
+        ;
+        return td_ns::GetTypeid<rT>{}.get();
+    }
+};
+
+template <typename T1, typename T2, typename resT, typename IndexerT>
+class minimum_strided_kernel;
+
+template <typename argTy1, typename argTy2>
+sycl::event
+minimum_strided_impl(sycl::queue exec_q,
+                     size_t nelems,
+                     int nd,
+                     const py::ssize_t *shape_and_strides,
+                     const char *arg1_p,
+                     py::ssize_t arg1_offset,
+                     const char *arg2_p,
+                     py::ssize_t arg2_offset,
+                     char *res_p,
+                     py::ssize_t res_offset,
+                     const std::vector<sycl::event> &depends,
+                     const std::vector<sycl::event> &additional_depends)
+{
+    return elementwise_common::binary_strided_impl<
+        argTy1, argTy2, MinimumOutputType, MinimumStridedFunctor,
+        minimum_strided_kernel>(exec_q, nelems, nd, shape_and_strides, arg1_p,
+                                arg1_offset, arg2_p, arg2_offset, res_p,
+                                res_offset, depends, additional_depends);
+}
+
+template <typename fnT, typename T1, typename T2> struct MinimumStridedFactory
+{
+    fnT get()
+    {
+        if constexpr (std::is_same_v<
+                          typename MinimumOutputType<T1, T2>::value_type, void>)
+        {
+            fnT fn = nullptr;
+            return fn;
+        }
+        else {
+            fnT fn = minimum_strided_impl<T1, T2>;
+            return fn;
+        }
+    }
+};
+
+} // namespace minimum
+} // namespace kernels
+} // namespace tensor
+} // namespace dpctl
diff --git a/dpctl/tensor/libtensor/include/utils/math_utils.hpp b/dpctl/tensor/libtensor/include/utils/math_utils.hpp
new file mode 100644
index 0000000000..d724e03e35
--- /dev/null
+++ b/dpctl/tensor/libtensor/include/utils/math_utils.hpp
@@ -0,0 +1,120 @@
+//===------- math_utils.hpp - Implementation of math utils  -------*-C++-*/===//
+//
+//                      Data Parallel Control (dpctl)
+//
+// Copyright 2020-2023 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines math utility functions.
+//===----------------------------------------------------------------------===//
+
+#pragma once
+#include <cmath>
+#include <complex>
+
+namespace dpctl
+{
+namespace tensor
+{
+namespace math_utils
+{
+
+template <typename T> bool less_complex(const T &x1, const T &x2)
+{
+    using realT = typename T::value_type;
+    realT real1 = std::real(x1);
+    realT real2 = std::real(x2);
+    realT imag1 = std::imag(x1);
+    realT imag2 = std::imag(x2);
+
+    return (real1 == real2)
+               ? (imag1 < imag2)
+               : (real1 < real2 && !std::isnan(imag1) && !std::isnan(imag2));
+}
+
+template <typename T> bool greater_complex(const T &x1, const T &x2)
+{
+    using realT = typename T::value_type;
+    realT real1 = std::real(x1);
+    realT real2 = std::real(x2);
+    realT imag1 = std::imag(x1);
+    realT imag2 = std::imag(x2);
+
+    return (real1 == real2)
+               ? (imag1 > imag2)
+               : (real1 > real2 && !std::isnan(imag1) && !std::isnan(imag2));
+}
+
+template <typename T> bool less_equal_complex(const T &x1, const T &x2)
+{
+    using realT = typename T::value_type;
+    realT real1 = std::real(x1);
+    realT real2 = std::real(x2);
+    realT imag1 = std::imag(x1);
+    realT imag2 = std::imag(x2);
+
+    return (real1 == real2)
+               ? (imag1 <= imag2)
+               : (real1 < real2 && !std::isnan(imag1) && !std::isnan(imag2));
+}
+
+template <typename T> bool greater_equal_complex(const T &x1, const T &x2)
+{
+    using realT = typename T::value_type;
+    realT real1 = std::real(x1);
+    realT real2 = std::real(x2);
+    realT imag1 = std::imag(x1);
+    realT imag2 = std::imag(x2);
+
+    return (real1 == real2)
+               ? (imag1 >= imag2)
+               : (real1 > real2 && !std::isnan(imag1) && !std::isnan(imag2));
+}
+
+template <typename T> T max_complex(const T &x1, const T &x2)
+{
+    using realT = typename T::value_type;
+    realT real1 = std::real(x1);
+    realT real2 = std::real(x2);
+    realT imag1 = std::imag(x1);
+    realT imag2 = std::imag(x2);
+
+    bool isnan_imag1 = std::isnan(imag1);
+    bool gt = (real1 == real2)
+                  ? (imag1 > imag2)
+                  : (real1 > real2 && !isnan_imag1 && !std::isnan(imag2));
+    return (std::isnan(real1) || isnan_imag1 || gt) ? x1 : x2;
+}
+
+template <typename T> T min_complex(const T &x1, const T &x2)
+{
+    using realT = typename T::value_type;
+    realT real1 = std::real(x1);
+    realT real2 = std::real(x2);
+    realT imag1 = std::imag(x1);
+    realT imag2 = std::imag(x2);
+
+    bool isnan_imag1 = std::isnan(imag1);
+    bool lt = (real1 == real2)
+                  ? (imag1 < imag2)
+                  : (real1 < real2 && !isnan_imag1 && !std::isnan(imag2));
+    return (std::isnan(real1) || isnan_imag1 || lt) ? x1 : x2;
+}
+
+} // namespace math_utils
+} // namespace tensor
+} // namespace dpctl
diff --git a/dpctl/tensor/libtensor/source/device_support_queries.cpp b/dpctl/tensor/libtensor/source/device_support_queries.cpp
index 16ae43ba97..946c36ad26 100644
--- a/dpctl/tensor/libtensor/source/device_support_queries.cpp
+++ b/dpctl/tensor/libtensor/source/device_support_queries.cpp
@@ -71,6 +71,11 @@ std::string _default_device_bool_type(sycl::device)
     return "b1";
 }
 
+std::string _default_device_index_type(sycl::device)
+{
+    return "i8";
+}
+
 sycl::device _extract_device(py::object arg)
 {
     auto const &api = dpctl::detail::dpctl_capi::get();
@@ -115,6 +120,12 @@ std::string default_device_complex_type(py::object arg)
     return _default_device_complex_type(d);
 }
 
+std::string default_device_index_type(py::object arg)
+{
+    sycl::device d = _extract_device(arg);
+    return _default_device_index_type(d);
+}
+
 } // namespace py_internal
 } // namespace tensor
 } // namespace dpctl
diff --git a/dpctl/tensor/libtensor/source/device_support_queries.hpp b/dpctl/tensor/libtensor/source/device_support_queries.hpp
index a54835fc75..6626b3502a 100644
--- a/dpctl/tensor/libtensor/source/device_support_queries.hpp
+++ b/dpctl/tensor/libtensor/source/device_support_queries.hpp
@@ -41,6 +41,7 @@ extern std::string default_device_fp_type(py::object);
 extern std::string default_device_int_type(py::object);
 extern std::string default_device_bool_type(py::object);
 extern std::string default_device_complex_type(py::object);
+extern std::string default_device_index_type(py::object);
 
 } // namespace py_internal
 } // namespace tensor
diff --git a/dpctl/tensor/libtensor/source/elementwise_functions.cpp b/dpctl/tensor/libtensor/source/elementwise_functions.cpp
index 22eb85eea9..cc95cecb38 100644
--- a/dpctl/tensor/libtensor/source/elementwise_functions.cpp
+++ b/dpctl/tensor/libtensor/source/elementwise_functions.cpp
@@ -74,6 +74,8 @@
 #include "kernels/elementwise_functions/logical_not.hpp"
 #include "kernels/elementwise_functions/logical_or.hpp"
 #include "kernels/elementwise_functions/logical_xor.hpp"
+#include "kernels/elementwise_functions/maximum.hpp"
+#include "kernels/elementwise_functions/minimum.hpp"
 #include "kernels/elementwise_functions/multiply.hpp"
 #include "kernels/elementwise_functions/negative.hpp"
 #include "kernels/elementwise_functions/not_equal.hpp"
@@ -1794,6 +1796,86 @@ void populate_logical_xor_dispatch_tables(void)
 };
 } // namespace impl
 
+// B??: ==== MAXIMUM    (x1, x2)
+namespace impl
+{
+
+namespace maximum_fn_ns = dpctl::tensor::kernels::maximum;
+
+static binary_contig_impl_fn_ptr_t
+    maximum_contig_dispatch_table[td_ns::num_types][td_ns::num_types];
+static int maximum_output_id_table[td_ns::num_types][td_ns::num_types];
+
+static binary_strided_impl_fn_ptr_t
+    maximum_strided_dispatch_table[td_ns::num_types][td_ns::num_types];
+
+void populate_maximum_dispatch_tables(void)
+{
+    using namespace td_ns;
+    namespace fn_ns = maximum_fn_ns;
+
+    // which input types are supported, and what is the type of the result
+    using fn_ns::MaximumTypeMapFactory;
+    DispatchTableBuilder<int, MaximumTypeMapFactory, num_types> dtb1;
+    dtb1.populate_dispatch_table(maximum_output_id_table);
+
+    // function pointers for operation on general strided arrays
+    using fn_ns::MaximumStridedFactory;
+    DispatchTableBuilder<binary_strided_impl_fn_ptr_t, MaximumStridedFactory,
+                         num_types>
+        dtb2;
+    dtb2.populate_dispatch_table(maximum_strided_dispatch_table);
+
+    // function pointers for operation on contiguous inputs and output
+    using fn_ns::MaximumContigFactory;
+    DispatchTableBuilder<binary_contig_impl_fn_ptr_t, MaximumContigFactory,
+                         num_types>
+        dtb3;
+    dtb3.populate_dispatch_table(maximum_contig_dispatch_table);
+};
+
+} // namespace impl
+
+// B??: ==== MINIMUM    (x1, x2)
+namespace impl
+{
+
+namespace minimum_fn_ns = dpctl::tensor::kernels::minimum;
+
+static binary_contig_impl_fn_ptr_t
+    minimum_contig_dispatch_table[td_ns::num_types][td_ns::num_types];
+static int minimum_output_id_table[td_ns::num_types][td_ns::num_types];
+
+static binary_strided_impl_fn_ptr_t
+    minimum_strided_dispatch_table[td_ns::num_types][td_ns::num_types];
+
+void populate_minimum_dispatch_tables(void)
+{
+    using namespace td_ns;
+    namespace fn_ns = minimum_fn_ns;
+
+    // which input types are supported, and what is the type of the result
+    using fn_ns::MinimumTypeMapFactory;
+    DispatchTableBuilder<int, MinimumTypeMapFactory, num_types> dtb1;
+    dtb1.populate_dispatch_table(minimum_output_id_table);
+
+    // function pointers for operation on general strided arrays
+    using fn_ns::MinimumStridedFactory;
+    DispatchTableBuilder<binary_strided_impl_fn_ptr_t, MinimumStridedFactory,
+                         num_types>
+        dtb2;
+    dtb2.populate_dispatch_table(minimum_strided_dispatch_table);
+
+    // function pointers for operation on contiguous inputs and output
+    using fn_ns::MinimumContigFactory;
+    DispatchTableBuilder<binary_contig_impl_fn_ptr_t, MinimumContigFactory,
+                         num_types>
+        dtb3;
+    dtb3.populate_dispatch_table(minimum_contig_dispatch_table);
+};
+
+} // namespace impl
+
 // B19: ==== MULTIPLY    (x1, x2)
 namespace impl
 {
@@ -3965,6 +4047,86 @@ void init_elementwise_functions(py::module_ m)
         m.def("_logical_xor_result_type", logical_xor_result_type_pyapi, "");
     }
 
+    // B??: ==== MAXIMUM    (x1, x2)
+    {
+        impl::populate_maximum_dispatch_tables();
+        using impl::maximum_contig_dispatch_table;
+        using impl::maximum_output_id_table;
+        using impl::maximum_strided_dispatch_table;
+
+        auto maximum_pyapi = [&](dpctl::tensor::usm_ndarray src1,
+                                 dpctl::tensor::usm_ndarray src2,
+                                 dpctl::tensor::usm_ndarray dst,
+                                 sycl::queue exec_q,
+                                 const std::vector<sycl::event> &depends = {}) {
+            return py_binary_ufunc(
+                src1, src2, dst, exec_q, depends, maximum_output_id_table,
+                // function pointers to handle operation on contiguous
+                // arrays (pointers may be nullptr)
+                maximum_contig_dispatch_table,
+                // function pointers to handle operation on strided arrays
+                // (most general case)
+                maximum_strided_dispatch_table,
+                // function pointers to handle operation of c-contig matrix
+                // and c-contig row with broadcasting (may be nullptr)
+                td_ns::NullPtrTable<
+                    binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{},
+                // function pointers to handle operation of c-contig matrix
+                // and c-contig row with broadcasting (may be nullptr)
+                td_ns::NullPtrTable<
+                    binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{});
+        };
+        auto maximum_result_type_pyapi = [&](py::dtype dtype1,
+                                             py::dtype dtype2) {
+            return py_binary_ufunc_result_type(dtype1, dtype2,
+                                               maximum_output_id_table);
+        };
+        m.def("_maximum", maximum_pyapi, "", py::arg("src1"), py::arg("src2"),
+              py::arg("dst"), py::arg("sycl_queue"),
+              py::arg("depends") = py::list());
+        m.def("_maximum_result_type", maximum_result_type_pyapi, "");
+    }
+
+    // B??: ==== MINIMUM    (x1, x2)
+    {
+        impl::populate_minimum_dispatch_tables();
+        using impl::minimum_contig_dispatch_table;
+        using impl::minimum_output_id_table;
+        using impl::minimum_strided_dispatch_table;
+
+        auto minimum_pyapi = [&](dpctl::tensor::usm_ndarray src1,
+                                 dpctl::tensor::usm_ndarray src2,
+                                 dpctl::tensor::usm_ndarray dst,
+                                 sycl::queue exec_q,
+                                 const std::vector<sycl::event> &depends = {}) {
+            return py_binary_ufunc(
+                src1, src2, dst, exec_q, depends, minimum_output_id_table,
+                // function pointers to handle operation on contiguous
+                // arrays (pointers may be nullptr)
+                minimum_contig_dispatch_table,
+                // function pointers to handle operation on strided arrays
+                // (most general case)
+                minimum_strided_dispatch_table,
+                // function pointers to handle operation of c-contig matrix
+                // and c-contig row with broadcasting (may be nullptr)
+                td_ns::NullPtrTable<
+                    binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{},
+                // function pointers to handle operation of c-contig matrix and
+                // c-contig row with broadcasting (may be nullptr)
+                td_ns::NullPtrTable<
+                    binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{});
+        };
+        auto minimum_result_type_pyapi = [&](py::dtype dtype1,
+                                             py::dtype dtype2) {
+            return py_binary_ufunc_result_type(dtype1, dtype2,
+                                               minimum_output_id_table);
+        };
+        m.def("_minimum", minimum_pyapi, "", py::arg("src1"), py::arg("src2"),
+              py::arg("dst"), py::arg("sycl_queue"),
+              py::arg("depends") = py::list());
+        m.def("_minimum_result_type", minimum_result_type_pyapi, "");
+    }
+
     // B19: ==== MULTIPLY    (x1, x2)
     {
         impl::populate_multiply_dispatch_tables();
diff --git a/dpctl/tensor/libtensor/source/tensor_py.cpp b/dpctl/tensor/libtensor/source/tensor_py.cpp
index 1833c2d770..9b4ba6cdad 100644
--- a/dpctl/tensor/libtensor/source/tensor_py.cpp
+++ b/dpctl/tensor/libtensor/source/tensor_py.cpp
@@ -297,9 +297,13 @@ PYBIND11_MODULE(_tensor_impl, m)
 
     m.def("default_device_complex_type",
           dpctl::tensor::py_internal::default_device_complex_type,
-          "Gives default complex floating point type support by device.",
+          "Gives default complex floating point type supported by device.",
           py::arg("dev"));
 
+    m.def("default_device_index_type",
+          dpctl::tensor::py_internal::default_device_index_type,
+          "Gives default index type supported by device.", py::arg("dev"));
+
     auto tril_fn = [](dpctl::tensor::usm_ndarray src,
                       dpctl::tensor::usm_ndarray dst, py::ssize_t k,
                       sycl::queue exec_q,
diff --git a/dpctl/tests/elementwise/test_complex.py b/dpctl/tests/elementwise/test_complex.py
index 684b405612..85d3c0ad2e 100644
--- a/dpctl/tests/elementwise/test_complex.py
+++ b/dpctl/tests/elementwise/test_complex.py
@@ -15,6 +15,7 @@
 #  limitations under the License.
 
 import itertools
+import warnings
 
 import numpy as np
 import pytest
@@ -203,12 +204,18 @@ def test_complex_special_cases(dtype):
     Xc = dpt.asarray(Xc_np, dtype=dtype, sycl_queue=q)
 
     tol = 8 * dpt.finfo(dtype).resolution
-    assert_allclose(
-        dpt.asnumpy(dpt.real(Xc)), np.real(Xc_np), atol=tol, rtol=tol
-    )
-    assert_allclose(
-        dpt.asnumpy(dpt.imag(Xc)), np.imag(Xc_np), atol=tol, rtol=tol
-    )
-    assert_allclose(
-        dpt.asnumpy(dpt.conj(Xc)), np.conj(Xc_np), atol=tol, rtol=tol
-    )
+
+    actual = dpt.real(Xc)
+    expected = np.real(Xc_np)
+    assert_allclose(dpt.asnumpy(actual), expected, atol=tol, rtol=tol)
+
+    actual = dpt.imag(Xc)
+    expected = np.imag(Xc_np)
+    assert_allclose(dpt.asnumpy(actual), expected, atol=tol, rtol=tol)
+
+    actual = dpt.conj(Xc)
+    expected = np.conj(Xc_np)
+
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore")
+        assert_allclose(dpt.asnumpy(actual), expected, atol=tol, rtol=tol)
diff --git a/dpctl/tests/elementwise/test_expm1.py b/dpctl/tests/elementwise/test_expm1.py
index 20dc421c77..ba95d2b96d 100644
--- a/dpctl/tests/elementwise/test_expm1.py
+++ b/dpctl/tests/elementwise/test_expm1.py
@@ -116,29 +116,53 @@ def test_expm1_order(dtype):
 
 
 def test_expm1_special_cases():
-    q = get_queue_or_skip()
+    get_queue_or_skip()
 
-    X = dpt.asarray(
-        [dpt.nan, 0.0, -0.0, dpt.inf, -dpt.inf], dtype="f4", sycl_queue=q
-    )
-    Xnp = dpt.asnumpy(X)
+    X = dpt.asarray([dpt.nan, 0.0, -0.0, dpt.inf, -dpt.inf], dtype="f4")
+    res = np.asarray([np.nan, 0.0, -0.0, np.inf, -1.0], dtype="f4")
 
     tol = dpt.finfo(X.dtype).resolution
-    assert_allclose(
-        dpt.asnumpy(dpt.expm1(X)), np.expm1(Xnp), atol=tol, rtol=tol
-    )
+    assert_allclose(dpt.asnumpy(dpt.expm1(X)), res, atol=tol, rtol=tol)
 
     # special cases for complex variant
+    num_finite = 1.0
     vals = [
-        complex(*val)
-        for val in itertools.permutations(
-            [dpt.nan, dpt.inf, -dpt.inf, 0.0, -0.0, 1.0], 2
-        )
+        complex(0.0, 0.0),
+        complex(num_finite, dpt.inf),
+        complex(num_finite, dpt.nan),
+        complex(dpt.inf, 0.0),
+        complex(-dpt.inf, num_finite),
+        complex(dpt.inf, num_finite),
+        complex(-dpt.inf, dpt.inf),
+        complex(dpt.inf, dpt.inf),
+        complex(-dpt.inf, dpt.nan),
+        complex(dpt.inf, dpt.nan),
+        complex(dpt.nan, 0.0),
+        complex(dpt.nan, num_finite),
+        complex(dpt.nan, dpt.nan),
     ]
     X = dpt.asarray(vals, dtype=dpt.complex64)
-    Xnp = dpt.asnumpy(X)
+    cis_1 = complex(np.cos(num_finite), np.sin(num_finite))
+    c_nan = complex(np.nan, np.nan)
+    res = np.asarray(
+        [
+            complex(0.0, 0.0),
+            c_nan,
+            c_nan,
+            complex(np.inf, 0.0),
+            0.0 * cis_1 - 1.0,
+            np.inf * cis_1 - 1.0,
+            complex(-1.0, 0.0),
+            complex(np.inf, np.nan),
+            complex(-1.0, 0.0),
+            complex(np.inf, np.nan),
+            complex(np.nan, 0.0),
+            c_nan,
+            c_nan,
+        ],
+        dtype=np.complex64,
+    )
 
     tol = dpt.finfo(X.dtype).resolution
-    assert_allclose(
-        dpt.asnumpy(dpt.expm1(X)), np.expm1(Xnp), atol=tol, rtol=tol
-    )
+    with np.errstate(invalid="ignore"):
+        assert_allclose(dpt.asnumpy(dpt.expm1(X)), res, atol=tol, rtol=tol)
diff --git a/dpctl/tests/elementwise/test_greater.py b/dpctl/tests/elementwise/test_greater.py
index fbda074e53..97915411ea 100644
--- a/dpctl/tests/elementwise/test_greater.py
+++ b/dpctl/tests/elementwise/test_greater.py
@@ -8,7 +8,7 @@
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
-#  Ungreater required by applicable law or agreed to in writing, software
+#  Unless required by applicable law or agreed to in writing, software
 #  distributed under the License is distributed on an "AS IS" BASIS,
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
diff --git a/dpctl/tests/elementwise/test_greater_equal.py b/dpctl/tests/elementwise/test_greater_equal.py
index 3f56e5d460..d188a852ea 100644
--- a/dpctl/tests/elementwise/test_greater_equal.py
+++ b/dpctl/tests/elementwise/test_greater_equal.py
@@ -8,7 +8,7 @@
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
-#  Ungreater_equal required by applicable law or agreed to in writing, software
+#  Unless required by applicable law or agreed to in writing, software
 #  distributed under the License is distributed on an "AS IS" BASIS,
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
diff --git a/dpctl/tests/elementwise/test_less_equal.py b/dpctl/tests/elementwise/test_less_equal.py
index b539d6a48f..1949b9d91f 100644
--- a/dpctl/tests/elementwise/test_less_equal.py
+++ b/dpctl/tests/elementwise/test_less_equal.py
@@ -8,7 +8,7 @@
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
-#  Unless_equal required by applicable law or agreed to in writing, software
+#  Unless required by applicable law or agreed to in writing, software
 #  distributed under the License is distributed on an "AS IS" BASIS,
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
diff --git a/dpctl/tests/elementwise/test_log1p.py b/dpctl/tests/elementwise/test_log1p.py
index 2820b4f8b5..d40574c415 100644
--- a/dpctl/tests/elementwise/test_log1p.py
+++ b/dpctl/tests/elementwise/test_log1p.py
@@ -119,28 +119,49 @@ def test_log1p_special_cases():
     q = get_queue_or_skip()
 
     X = dpt.asarray(
-        [dpt.nan, -1.0, -2.0, 0.0, -0.0, dpt.inf, -dpt.inf],
+        [dpt.nan, -2.0, -1.0, -0.0, 0.0, dpt.inf],
         dtype="f4",
         sycl_queue=q,
     )
-    Xnp = dpt.asnumpy(X)
+    res = np.asarray([np.nan, np.nan, -np.inf, -0.0, 0.0, np.inf])
 
     tol = dpt.finfo(X.dtype).resolution
-    assert_allclose(
-        dpt.asnumpy(dpt.log1p(X)), np.log1p(Xnp), atol=tol, rtol=tol
-    )
+    with np.errstate(divide="ignore", invalid="ignore"):
+        assert_allclose(dpt.asnumpy(dpt.log1p(X)), res, atol=tol, rtol=tol)
 
     # special cases for complex
     vals = [
-        complex(*val)
-        for val in itertools.permutations(
-            [dpt.nan, dpt.inf, -dpt.inf, 0.0, -0.0, 1.0, -1.0, -2.0], 2
-        )
+        complex(-1.0, 0.0),
+        complex(2.0, dpt.inf),
+        complex(2.0, dpt.nan),
+        complex(-dpt.inf, 1.0),
+        complex(dpt.inf, 1.0),
+        complex(-dpt.inf, dpt.inf),
+        complex(dpt.inf, dpt.inf),
+        complex(dpt.inf, dpt.nan),
+        complex(dpt.nan, 1.0),
+        complex(dpt.nan, dpt.inf),
+        complex(dpt.nan, dpt.nan),
     ]
     X = dpt.asarray(vals, dtype=dpt.complex64)
-    Xnp = dpt.asnumpy(X)
+    c_nan = complex(np.nan, np.nan)
+    res = np.asarray(
+        [
+            complex(-np.inf, 0.0),
+            complex(np.inf, np.pi / 2),
+            c_nan,
+            complex(np.inf, np.pi),
+            complex(np.inf, 0.0),
+            complex(np.inf, 3 * np.pi / 4),
+            complex(np.inf, np.pi / 4),
+            complex(np.inf, np.nan),
+            c_nan,
+            complex(np.inf, np.nan),
+            c_nan,
+        ],
+        dtype=np.complex64,
+    )
 
     tol = dpt.finfo(X.dtype).resolution
-    assert_allclose(
-        dpt.asnumpy(dpt.log1p(X)), np.log1p(Xnp), atol=tol, rtol=tol
-    )
+    with np.errstate(invalid="ignore"):
+        assert_allclose(dpt.asnumpy(dpt.log1p(X)), res, atol=tol, rtol=tol)
diff --git a/dpctl/tests/elementwise/test_logical_and.py b/dpctl/tests/elementwise/test_logical_and.py
index 12a35b06d6..c897bde876 100644
--- a/dpctl/tests/elementwise/test_logical_and.py
+++ b/dpctl/tests/elementwise/test_logical_and.py
@@ -8,7 +8,7 @@
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
-#  Unless_equal required by applicable law or agreed to in writing, software
+#  Unless required by applicable law or agreed to in writing, software
 #  distributed under the License is distributed on an "AS IS" BASIS,
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
diff --git a/dpctl/tests/elementwise/test_logical_not.py b/dpctl/tests/elementwise/test_logical_not.py
index aec9bf31b4..80ebaebb89 100644
--- a/dpctl/tests/elementwise/test_logical_not.py
+++ b/dpctl/tests/elementwise/test_logical_not.py
@@ -8,7 +8,7 @@
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
-#  Unless_equal required by applicable law or agreed to in writing, software
+#  Unless required by applicable law or agreed to in writing, software
 #  distributed under the License is distributed on an "AS IS" BASIS,
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
diff --git a/dpctl/tests/elementwise/test_logical_or.py b/dpctl/tests/elementwise/test_logical_or.py
index f99f6758f5..9bc46cb042 100644
--- a/dpctl/tests/elementwise/test_logical_or.py
+++ b/dpctl/tests/elementwise/test_logical_or.py
@@ -8,7 +8,7 @@
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
-#  Unless_equal required by applicable law or agreed to in writing, software
+#  Unless required by applicable law or agreed to in writing, software
 #  distributed under the License is distributed on an "AS IS" BASIS,
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
diff --git a/dpctl/tests/elementwise/test_logical_xor.py b/dpctl/tests/elementwise/test_logical_xor.py
index 9c34e8bbb5..aef9bc2f42 100644
--- a/dpctl/tests/elementwise/test_logical_xor.py
+++ b/dpctl/tests/elementwise/test_logical_xor.py
@@ -8,7 +8,7 @@
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
-#  Unless_equal required by applicable law or agreed to in writing, software
+#  Unless required by applicable law or agreed to in writing, software
 #  distributed under the License is distributed on an "AS IS" BASIS,
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
diff --git a/dpctl/tests/elementwise/test_maximum_minimum.py b/dpctl/tests/elementwise/test_maximum_minimum.py
new file mode 100644
index 0000000000..e8b845d20d
--- /dev/null
+++ b/dpctl/tests/elementwise/test_maximum_minimum.py
@@ -0,0 +1,314 @@
+#                       Data Parallel Control (dpctl)
+#
+#  Copyright 2020-2023 Intel Corporation
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+import ctypes
+import itertools
+
+import numpy as np
+import pytest
+from numpy.testing import assert_array_equal
+
+import dpctl
+import dpctl.tensor as dpt
+from dpctl.tests.helper import get_queue_or_skip, skip_if_dtype_not_supported
+
+from .utils import _all_dtypes, _compare_dtypes, _usm_types
+
+
+@pytest.mark.parametrize("op1_dtype", _all_dtypes)
+@pytest.mark.parametrize("op2_dtype", _all_dtypes)
+def test_maximum_minimum_dtype_matrix(op1_dtype, op2_dtype):
+    q = get_queue_or_skip()
+    skip_if_dtype_not_supported(op1_dtype, q)
+    skip_if_dtype_not_supported(op2_dtype, q)
+
+    sz = 127
+    ar1_np = np.arange(sz)
+    np.random.shuffle(ar1_np)
+    ar1 = dpt.asarray(ar1_np, dtype=op1_dtype)
+    ar2_np = np.arange(sz)
+    np.random.shuffle(ar2_np)
+    ar2 = dpt.asarray(ar2_np, dtype=op2_dtype)
+
+    r = dpt.maximum(ar1, ar2)
+    assert isinstance(r, dpt.usm_ndarray)
+    expected = np.maximum(ar1_np.astype(op1_dtype), ar2_np.astype(op2_dtype))
+
+    assert _compare_dtypes(r.dtype, expected.dtype, sycl_queue=q)
+    assert r.shape == ar1.shape
+    assert (dpt.asnumpy(r) == expected).all()
+    assert r.sycl_queue == ar1.sycl_queue
+
+    r = dpt.minimum(ar1, ar2)
+    assert isinstance(r, dpt.usm_ndarray)
+    expected = np.minimum(ar1_np.astype(op1_dtype), ar2_np.astype(op2_dtype))
+
+    assert _compare_dtypes(r.dtype, expected.dtype, sycl_queue=q)
+    assert r.shape == ar1.shape
+    assert (dpt.asnumpy(r) == expected).all()
+    assert r.sycl_queue == ar1.sycl_queue
+
+    ar3_np = np.arange(sz)
+    np.random.shuffle(ar3_np)
+    ar3 = dpt.asarray(ar3_np, dtype=op1_dtype)
+    ar4_np = np.arange(2 * sz)
+    np.random.shuffle(ar4_np)
+    ar4 = dpt.asarray(ar4_np, dtype=op2_dtype)
+
+    r = dpt.maximum(ar3[::-1], ar4[::2])
+    assert isinstance(r, dpt.usm_ndarray)
+    expected = np.maximum(
+        ar3_np[::-1].astype(op1_dtype), ar4_np[::2].astype(op2_dtype)
+    )
+
+    assert _compare_dtypes(r.dtype, expected.dtype, sycl_queue=q)
+    assert r.shape == ar3.shape
+    assert (dpt.asnumpy(r) == expected).all()
+
+    r = dpt.minimum(ar3[::-1], ar4[::2])
+    assert isinstance(r, dpt.usm_ndarray)
+    expected = np.minimum(
+        ar3_np[::-1].astype(op1_dtype), ar4_np[::2].astype(op2_dtype)
+    )
+
+    assert _compare_dtypes(r.dtype, expected.dtype, sycl_queue=q)
+    assert r.shape == ar3.shape
+    assert (dpt.asnumpy(r) == expected).all()
+
+
+@pytest.mark.parametrize("op_dtype", ["c8", "c16"])
+def test_maximum_minimum_complex_matrix(op_dtype):
+    q = get_queue_or_skip()
+    skip_if_dtype_not_supported(op_dtype, q)
+
+    sz = 127
+    ar1_np_real = np.random.randint(0, 10, sz)
+    ar1_np_imag = np.random.randint(0, 10, sz)
+    ar1 = dpt.asarray(ar1_np_real + 1j * ar1_np_imag, dtype=op_dtype)
+
+    ar2_np_real = np.random.randint(0, 10, sz)
+    ar2_np_imag = np.random.randint(0, 10, sz)
+    ar2 = dpt.asarray(ar2_np_real + 1j * ar2_np_imag, dtype=op_dtype)
+
+    r = dpt.maximum(ar1, ar2)
+    expected = np.maximum(dpt.asnumpy(ar1), dpt.asnumpy(ar2))
+    assert _compare_dtypes(r.dtype, expected.dtype, sycl_queue=q)
+    assert r.shape == expected.shape
+    assert_array_equal(dpt.asnumpy(r), expected)
+
+    r1 = dpt.maximum(ar1[::-2], ar2[::2])
+    expected1 = np.maximum(dpt.asnumpy(ar1[::-2]), dpt.asnumpy(ar2[::2]))
+    assert _compare_dtypes(r.dtype, expected1.dtype, sycl_queue=q)
+    assert r1.shape == expected1.shape
+    assert_array_equal(dpt.asnumpy(r1), expected1)
+
+    r = dpt.minimum(ar1, ar2)
+    expected = np.minimum(dpt.asnumpy(ar1), dpt.asnumpy(ar2))
+    assert _compare_dtypes(r.dtype, expected.dtype, sycl_queue=q)
+    assert r.shape == expected.shape
+    assert_array_equal(dpt.asnumpy(r), expected)
+
+    r1 = dpt.minimum(ar1[::-2], ar2[::2])
+    expected1 = np.minimum(dpt.asnumpy(ar1[::-2]), dpt.asnumpy(ar2[::2]))
+    assert _compare_dtypes(r.dtype, expected1.dtype, sycl_queue=q)
+    assert r1.shape == expected1.shape
+    assert_array_equal(dpt.asnumpy(r1), expected1)
+
+
+@pytest.mark.parametrize("dtype", ["f2", "f4", "f8"])
+def test_maximum_minimum_real_special_cases(dtype):
+    q = get_queue_or_skip()
+    skip_if_dtype_not_supported(dtype, q)
+
+    x = [np.nan, np.inf, -np.inf, 5.0, -3.0]
+    x = list(itertools.product(x, repeat=2))
+    Xnp = np.array([tup[0] for tup in x], dtype=dtype)
+    Ynp = np.array([tup[1] for tup in x], dtype=dtype)
+    X = dpt.asarray(Xnp, dtype=dtype)
+    Y = dpt.asarray(Ynp, dtype=dtype)
+
+    R = dpt.maximum(X, Y)
+    Rnp = np.maximum(Xnp, Ynp)
+    assert_array_equal(dpt.asnumpy(R), Rnp)
+
+    R = dpt.minimum(X, Y)
+    Rnp = np.minimum(Xnp, Ynp)
+    assert_array_equal(dpt.asnumpy(R), Rnp)
+
+
+@pytest.mark.parametrize("dtype", ["c8", "c16"])
+def test_maximum_minimum_complex_special_cases(dtype):
+    q = get_queue_or_skip()
+    skip_if_dtype_not_supported(dtype, q)
+
+    x = [np.nan, -np.inf, -np.inf, +2.0, -1.0]
+    x = [complex(*val) for val in itertools.product(x, repeat=2)]
+    x = list(itertools.product(x, repeat=2))
+
+    Xnp = np.array([tup[0] for tup in x], dtype=dtype)
+    Ynp = np.array([tup[1] for tup in x], dtype=dtype)
+    X = dpt.asarray(Xnp, dtype=dtype, sycl_queue=q)
+    Y = dpt.asarray(Ynp, dtype=dtype, sycl_queue=q)
+
+    R = dpt.maximum(X, Y)
+    Rnp = np.maximum(Xnp, Ynp)
+    assert_array_equal(dpt.asnumpy(dpt.real(R)), np.real(Rnp))
+    assert_array_equal(dpt.asnumpy(dpt.imag(R)), np.imag(Rnp))
+
+    R = dpt.minimum(X, Y)
+    Rnp = np.minimum(Xnp, Ynp)
+    assert_array_equal(dpt.asnumpy(dpt.real(R)), np.real(Rnp))
+    assert_array_equal(dpt.asnumpy(dpt.imag(R)), np.imag(Rnp))
+
+
+@pytest.mark.parametrize("op1_usm_type", _usm_types)
+@pytest.mark.parametrize("op2_usm_type", _usm_types)
+def test_maximum_minimum_usm_type_matrix(op1_usm_type, op2_usm_type):
+    get_queue_or_skip()
+
+    sz = 128
+    ar1_np = np.arange(sz, dtype="i4")
+    np.random.shuffle(ar1_np)
+    ar1 = dpt.asarray(ar1_np, usm_type=op1_usm_type)
+    ar2_np = np.arange(sz, dtype="i4")
+    np.random.shuffle(ar2_np)
+    ar2 = dpt.asarray(ar2_np, usm_type=op2_usm_type)
+
+    r = dpt.maximum(ar1, ar2)
+    assert isinstance(r, dpt.usm_ndarray)
+    expected_usm_type = dpctl.utils.get_coerced_usm_type(
+        (op1_usm_type, op2_usm_type)
+    )
+    assert r.usm_type == expected_usm_type
+
+    r = dpt.minimum(ar1, ar2)
+    assert isinstance(r, dpt.usm_ndarray)
+    expected_usm_type = dpctl.utils.get_coerced_usm_type(
+        (op1_usm_type, op2_usm_type)
+    )
+    assert r.usm_type == expected_usm_type
+
+
+def test_maximum_minimum_order():
+    get_queue_or_skip()
+
+    ar1_np = np.arange(20 * 20, dtype="i4").reshape(20, 20)
+    np.random.shuffle(ar1_np)
+    ar1 = dpt.asarray(ar1_np, order="C")
+    ar2_np = np.arange(20 * 20, dtype="i4").reshape(20, 20)
+    np.random.shuffle(ar2_np)
+    ar2 = dpt.asarray(ar2_np, order="C")
+
+    r1 = dpt.maximum(ar1, ar2, order="C")
+    assert r1.flags.c_contiguous
+    r2 = dpt.maximum(ar1, ar2, order="F")
+    assert r2.flags.f_contiguous
+    r3 = dpt.maximum(ar1, ar2, order="A")
+    assert r3.flags.c_contiguous
+    r4 = dpt.maximum(ar1, ar2, order="K")
+    assert r4.flags.c_contiguous
+
+    ar1 = dpt.asarray(ar1_np, order="F")
+    ar2 = dpt.asarray(ar2_np, order="F")
+    r1 = dpt.maximum(ar1, ar2, order="C")
+    assert r1.flags.c_contiguous
+    r2 = dpt.maximum(ar1, ar2, order="F")
+    assert r2.flags.f_contiguous
+    r3 = dpt.maximum(ar1, ar2, order="A")
+    assert r3.flags.f_contiguous
+    r4 = dpt.maximum(ar1, ar2, order="K")
+    assert r4.flags.f_contiguous
+
+    ar1_np = np.arange(40 * 40, dtype="i4").reshape(40, 40)
+    np.random.shuffle(ar1_np)
+    ar1 = dpt.asarray(ar1_np, order="C")[:20, ::-2]
+    ar2_np = np.arange(40 * 40, dtype="i4").reshape(40, 40)
+    np.random.shuffle(ar2_np)
+    ar2 = dpt.asarray(ar2_np, order="C")[:20, ::-2]
+    r4 = dpt.maximum(ar1, ar2, order="K")
+    assert r4.strides == (20, -1)
+
+    ar1 = dpt.asarray(ar1_np, order="C")[:20, ::-2].mT
+    ar2 = dpt.asarray(ar2_np, order="C")[:20, ::-2].mT
+    r4 = dpt.maximum(ar1, ar2, order="K")
+    assert r4.strides == (-1, 20)
+
+
+@pytest.mark.parametrize("arr_dt", _all_dtypes)
+def test_maximum_minimum_python_scalar(arr_dt):
+    q = get_queue_or_skip()
+    skip_if_dtype_not_supported(arr_dt, q)
+
+    X = dpt.zeros((10, 10), dtype=arr_dt, sycl_queue=q)
+    py_ones = (
+        bool(1),
+        int(1),
+        float(1),
+        complex(1),
+        np.float32(1),
+        ctypes.c_int(1),
+    )
+    for sc in py_ones:
+        R = dpt.maximum(X, sc)
+        assert isinstance(R, dpt.usm_ndarray)
+        R = dpt.maximum(sc, X)
+        assert isinstance(R, dpt.usm_ndarray)
+
+        R = dpt.minimum(X, sc)
+        assert isinstance(R, dpt.usm_ndarray)
+        R = dpt.minimum(sc, X)
+        assert isinstance(R, dpt.usm_ndarray)
+
+
+class MockArray:
+    def __init__(self, arr):
+        self.data_ = arr
+
+    @property
+    def __sycl_usm_array_interface__(self):
+        return self.data_.__sycl_usm_array_interface__
+
+
+def test_maximum_minimum_mock_array():
+    get_queue_or_skip()
+    a = dpt.arange(10)
+    b = dpt.ones(10)
+    c = MockArray(b)
+    r = dpt.maximum(a, c)
+    assert isinstance(r, dpt.usm_ndarray)
+
+    r = dpt.minimum(a, c)
+    assert isinstance(r, dpt.usm_ndarray)
+
+
+def test_maximum_canary_mock_array():
+    get_queue_or_skip()
+    a = dpt.arange(10)
+
+    class Canary:
+        def __init__(self):
+            pass
+
+        @property
+        def __sycl_usm_array_interface__(self):
+            return None
+
+    c = Canary()
+    with pytest.raises(ValueError):
+        dpt.maximum(a, c)
+
+    with pytest.raises(ValueError):
+        dpt.minimum(a, c)
diff --git a/dpctl/tests/elementwise/test_round.py b/dpctl/tests/elementwise/test_round.py
index fb2b104bb1..6ca4feaf22 100644
--- a/dpctl/tests/elementwise/test_round.py
+++ b/dpctl/tests/elementwise/test_round.py
@@ -1,3 +1,19 @@
+#                       Data Parallel Control (dpctl)
+#
+#  Copyright 2020-2023 Intel Corporation
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
 import itertools
 
 import numpy as np
diff --git a/dpctl/tests/elementwise/test_type_utils.py b/dpctl/tests/elementwise/test_type_utils.py
index 403e455c2e..f6e09b9b4e 100644
--- a/dpctl/tests/elementwise/test_type_utils.py
+++ b/dpctl/tests/elementwise/test_type_utils.py
@@ -18,6 +18,7 @@
 
 import dpctl
 import dpctl.tensor as dpt
+import dpctl.tensor._copy_utils as cu
 import dpctl.tensor._type_utils as tu
 
 from .utils import _all_dtypes, _map_to_device_dtype
@@ -73,15 +74,15 @@ def test_type_utils_empty_like_orderK():
         a = dpt.empty((10, 10), dtype=dpt.int32, order="F")
     except dpctl.SyclDeviceCreationError:
         pytest.skip("No SYCL devices available")
-    X = tu._empty_like_orderK(a, dpt.int32, a.usm_type, a.device)
+    X = cu._empty_like_orderK(a, dpt.int32, a.usm_type, a.device)
     assert X.flags["F"]
 
 
 def test_type_utils_empty_like_orderK_invalid_args():
     with pytest.raises(TypeError):
-        tu._empty_like_orderK([1, 2, 3], dpt.int32, "device", None)
+        cu._empty_like_orderK([1, 2, 3], dpt.int32, "device", None)
     with pytest.raises(TypeError):
-        tu._empty_like_pair_orderK(
+        cu._empty_like_pair_orderK(
             [1, 2, 3],
             (
                 1,
@@ -98,7 +99,7 @@ def test_type_utils_empty_like_orderK_invalid_args():
     except dpctl.SyclDeviceCreationError:
         pytest.skip("No SYCL devices available")
     with pytest.raises(TypeError):
-        tu._empty_like_pair_orderK(
+        cu._empty_like_pair_orderK(
             a,
             (
                 1,
@@ -185,3 +186,47 @@ def test_binary_func_arg_validation():
     with pytest.raises(ValueError):
         dpt.add(a, Ellipsis)
     dpt.add(a, a, order="invalid")
+
+
+def test_all_data_types():
+    fp16_fp64_types = set([dpt.float16, dpt.float64, dpt.complex128])
+    fp64_types = set([dpt.float64, dpt.complex128])
+
+    all_dts = tu._all_data_types(True, True)
+    assert fp16_fp64_types.issubset(all_dts)
+
+    all_dts = tu._all_data_types(True, False)
+    assert dpt.float16 in all_dts
+    assert not fp64_types.issubset(all_dts)
+
+    all_dts = tu._all_data_types(False, True)
+    assert dpt.float16 not in all_dts
+    assert fp64_types.issubset(all_dts)
+
+    all_dts = tu._all_data_types(False, False)
+    assert not fp16_fp64_types.issubset(all_dts)
+
+
+@pytest.mark.parametrize("fp16", [True, False])
+@pytest.mark.parametrize("fp64", [True, False])
+def test_maximal_inexact_types(fp16, fp64):
+    assert not tu._is_maximal_inexact_type(dpt.int32, fp16, fp64)
+    assert fp64 == tu._is_maximal_inexact_type(dpt.float64, fp16, fp64)
+    assert fp64 == tu._is_maximal_inexact_type(dpt.complex128, fp16, fp64)
+    assert fp64 != tu._is_maximal_inexact_type(dpt.float32, fp16, fp64)
+    assert fp64 != tu._is_maximal_inexact_type(dpt.complex64, fp16, fp64)
+
+
+def test_can_cast_device():
+    assert tu._can_cast(dpt.int64, dpt.float64, True, True)
+    # if f8 is available, can't cast i8 to f4
+    assert not tu._can_cast(dpt.int64, dpt.float32, True, True)
+    assert not tu._can_cast(dpt.int64, dpt.float32, False, True)
+    # should be able to cast to f8 when f2 unavailable
+    assert tu._can_cast(dpt.int64, dpt.float64, False, True)
+    # casting to f4 acceptable when f8 unavailable
+    assert tu._can_cast(dpt.int64, dpt.float32, True, False)
+    assert tu._can_cast(dpt.int64, dpt.float32, False, False)
+    # can't safely cast inexact type to inexact type of lesser precision
+    assert not tu._can_cast(dpt.float32, dpt.float16, True, False)
+    assert not tu._can_cast(dpt.float64, dpt.float32, False, True)
diff --git a/dpctl/tests/test_sycl_platform.py b/dpctl/tests/test_sycl_platform.py
index 39331f9cdc..fa93dbbd12 100644
--- a/dpctl/tests/test_sycl_platform.py
+++ b/dpctl/tests/test_sycl_platform.py
@@ -17,6 +17,8 @@
 """Defines unit test cases for the SyclPlatform class.
 """
 
+import sys
+
 import pytest
 from helper import has_sycl_platforms
 
@@ -88,10 +90,27 @@ def check_repr(platform):
 
 
 def check_default_context(platform):
+    if "linux" not in sys.platform:
+        return
     r = platform.default_context
     assert type(r) is dpctl.SyclContext
 
 
+def check_equal_and_hash(platform):
+    assert platform == platform
+    if "linux" not in sys.platform:
+        return
+    default_ctx = platform.default_context
+    for d in default_ctx.get_devices():
+        assert platform == d.sycl_platform
+        assert hash(platform) == hash(d.sycl_platform)
+
+
+def check_hash_in_dict(platform):
+    map = {platform: 0}
+    assert map[platform] == 0
+
+
 list_of_checks = [
     check_name,
     check_vendor,
@@ -99,6 +118,9 @@ def check_default_context(platform):
     check_backend,
     check_print_info,
     check_repr,
+    check_default_context,
+    check_equal_and_hash,
+    check_hash_in_dict,
 ]
 
 
diff --git a/dpctl/tests/test_type_utils.py b/dpctl/tests/test_type_utils.py
deleted file mode 100644
index 882478a2ce..0000000000
--- a/dpctl/tests/test_type_utils.py
+++ /dev/null
@@ -1,68 +0,0 @@
-#                      Data Parallel Control (dpctl)
-#
-# Copyright 2020-2023 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import pytest
-
-import dpctl.tensor as dpt
-from dpctl.tensor._type_utils import (
-    _all_data_types,
-    _can_cast,
-    _is_maximal_inexact_type,
-)
-
-
-def test_all_data_types():
-    fp16_fp64_types = set([dpt.float16, dpt.float64, dpt.complex128])
-    fp64_types = set([dpt.float64, dpt.complex128])
-
-    all_dts = _all_data_types(True, True)
-    assert fp16_fp64_types.issubset(all_dts)
-
-    all_dts = _all_data_types(True, False)
-    assert dpt.float16 in all_dts
-    assert not fp64_types.issubset(all_dts)
-
-    all_dts = _all_data_types(False, True)
-    assert dpt.float16 not in all_dts
-    assert fp64_types.issubset(all_dts)
-
-    all_dts = _all_data_types(False, False)
-    assert not fp16_fp64_types.issubset(all_dts)
-
-
-@pytest.mark.parametrize("fp16", [True, False])
-@pytest.mark.parametrize("fp64", [True, False])
-def test_maximal_inexact_types(fp16, fp64):
-    assert not _is_maximal_inexact_type(dpt.int32, fp16, fp64)
-    assert fp64 == _is_maximal_inexact_type(dpt.float64, fp16, fp64)
-    assert fp64 == _is_maximal_inexact_type(dpt.complex128, fp16, fp64)
-    assert fp64 != _is_maximal_inexact_type(dpt.float32, fp16, fp64)
-    assert fp64 != _is_maximal_inexact_type(dpt.complex64, fp16, fp64)
-
-
-def test_can_cast_device():
-    assert _can_cast(dpt.int64, dpt.float64, True, True)
-    # if f8 is available, can't cast i8 to f4
-    assert not _can_cast(dpt.int64, dpt.float32, True, True)
-    assert not _can_cast(dpt.int64, dpt.float32, False, True)
-    # should be able to cast to f8 when f2 unavailable
-    assert _can_cast(dpt.int64, dpt.float64, False, True)
-    # casting to f4 acceptable when f8 unavailable
-    assert _can_cast(dpt.int64, dpt.float32, True, False)
-    assert _can_cast(dpt.int64, dpt.float32, False, False)
-    # can't safely cast inexact type to inexact type of lesser precision
-    assert not _can_cast(dpt.float32, dpt.float16, True, False)
-    assert not _can_cast(dpt.float64, dpt.float32, False, True)
diff --git a/dpctl/tests/test_usm_ndarray_ctor.py b/dpctl/tests/test_usm_ndarray_ctor.py
index e285dcb35f..4ab28db479 100644
--- a/dpctl/tests/test_usm_ndarray_ctor.py
+++ b/dpctl/tests/test_usm_ndarray_ctor.py
@@ -84,7 +84,7 @@ def test_usm_ndarray_flags():
     assert f.forc
     assert f.fnc
 
-    f = dpt.usm_ndarray((5, 1, 1), dtype="i4", strides=(1, 0, 1)).flags
+    f = dpt.usm_ndarray((5, 0, 1), dtype="i4", strides=(1, 0, 1)).flags
     assert f.fc
     assert f.forc
     assert not dpt.usm_ndarray(
@@ -109,6 +109,25 @@ def test_usm_ndarray_flags():
         x.flags["C"] = False
 
 
+def test_usm_ndarray_flags_bug_gh_1334():
+    get_queue_or_skip()
+    a = dpt.ones((2, 3), dtype="u4")
+    r = dpt.reshape(a, (1, 6, 1))
+    assert r.flags["C"] and r.flags["F"]
+
+    a = dpt.ones((2, 3), dtype="u4", order="F")
+    r = dpt.reshape(a, (1, 6, 1), order="F")
+    assert r.flags["C"] and r.flags["F"]
+
+    a = dpt.ones((2, 3, 4), dtype="i8")
+    r = dpt.sum(a, axis=(1, 2), keepdims=True)
+    assert r.flags["C"] and r.flags["F"]
+
+    a = dpt.ones((2, 1), dtype="?")
+    r = a[:, 1::-1]
+    assert r.flags["F"] and r.flags["C"]
+
+
 @pytest.mark.parametrize(
     "dtype",
     [
@@ -1012,6 +1031,53 @@ def test_setitem_same_dtype(dtype, src_usm_type, dst_usm_type):
     Zusm_empty[Ellipsis] = Zusm_3d[0, 0, 0:0]
 
 
+def test_setitem_broadcasting():
+    get_queue_or_skip()
+    dst = dpt.ones((2, 3, 4), dtype="u4")
+    src = dpt.zeros((3, 1), dtype=dst.dtype)
+    dst[...] = src
+    expected = np.zeros(dst.shape, dtype=dst.dtype)
+    assert np.array_equal(dpt.asnumpy(dst), expected)
+
+
+def test_setitem_broadcasting_empty_dst_validation():
+    "Broadcasting rules apply, except exception"
+    get_queue_or_skip()
+    dst = dpt.ones((2, 0, 5, 4), dtype="i8")
+    src = dpt.ones((2, 0, 3, 4), dtype="i8")
+    with pytest.raises(ValueError):
+        dst[...] = src
+
+
+def test_setitem_broadcasting_empty_dst_edge_case():
+    """RHS is shunken to empty array by
+    broadasting rule, hence no exception"""
+    get_queue_or_skip()
+    dst = dpt.ones(1, dtype="i8")[0:0]
+    src = dpt.ones(tuple(), dtype="i8")
+    dst[...] = src
+
+
+def test_setitem_broadcasting_src_ndim_equal_dst_ndim():
+    get_queue_or_skip()
+    dst = dpt.ones((2, 3, 4), dtype="i4")
+    src = dpt.zeros((2, 1, 4), dtype="i4")
+    dst[...] = src
+
+    expected = np.zeros(dst.shape, dtype=dst.dtype)
+    assert np.array_equal(dpt.asnumpy(dst), expected)
+
+
+def test_setitem_broadcasting_src_ndim_greater_than_dst_ndim():
+    get_queue_or_skip()
+    dst = dpt.ones((2, 3, 4), dtype="i4")
+    src = dpt.zeros((1, 2, 1, 4), dtype="i4")
+    dst[...] = src
+
+    expected = np.zeros(dst.shape, dtype=dst.dtype)
+    assert np.array_equal(dpt.asnumpy(dst), expected)
+
+
 @pytest.mark.parametrize(
     "dtype",
     _all_dtypes,
diff --git a/dpctl/tests/test_usm_ndarray_indexing.py b/dpctl/tests/test_usm_ndarray_indexing.py
index 87d89a1b8d..9d166226e7 100644
--- a/dpctl/tests/test_usm_ndarray_indexing.py
+++ b/dpctl/tests/test_usm_ndarray_indexing.py
@@ -22,6 +22,7 @@
 
 import dpctl
 import dpctl.tensor as dpt
+import dpctl.tensor._tensor_impl as ti
 from dpctl.utils import ExecutionPlacementError
 
 _all_dtypes = [
@@ -1353,7 +1354,7 @@ def test_nonzero_dtype():
     x = dpt.ones((3, 4))
     idx, idy = dpt.nonzero(x)
     # create array using device's
-    # default integral data type
-    ref = dpt.arange(8)
-    assert idx.dtype == ref.dtype
-    assert idy.dtype == ref.dtype
+    # default index data type
+    index_dt = dpt.dtype(ti.default_device_index_type(x.sycl_queue))
+    assert idx.dtype == index_dt
+    assert idy.dtype == index_dt
diff --git a/dpctl/tests/test_usm_ndarray_operators.py b/dpctl/tests/test_usm_ndarray_operators.py
index 92dc38bd47..def22562c0 100644
--- a/dpctl/tests/test_usm_ndarray_operators.py
+++ b/dpctl/tests/test_usm_ndarray_operators.py
@@ -47,7 +47,7 @@ def multiply(a, b):
             return b
 
 
-@pytest.mark.parametrize("namespace", [None, Dummy()])
+@pytest.mark.parametrize("namespace", [dpt, Dummy()])
 def test_fp_ops(namespace):
     try:
         X = dpt.ones(1)
@@ -81,7 +81,7 @@ def test_fp_ops(namespace):
     X.__ifloordiv__(1.0)
 
 
-@pytest.mark.parametrize("namespace", [None, Dummy()])
+@pytest.mark.parametrize("namespace", [dpt, Dummy()])
 def test_int_ops(namespace):
     try:
         X = dpt.usm_ndarray(1, "i4")
@@ -113,7 +113,7 @@ def test_int_ops(namespace):
     X.__ipow__(2)
 
 
-@pytest.mark.parametrize("namespace", [None, Dummy()])
+@pytest.mark.parametrize("namespace", [dpt, Dummy()])
 def test_mat_ops(namespace):
     try:
         M = dpt.eye(3, 3)
diff --git a/libsyclinterface/include/dpctl_sycl_context_interface.h b/libsyclinterface/include/dpctl_sycl_context_interface.h
index 6ca3c92d05..ddf80e1e67 100644
--- a/libsyclinterface/include/dpctl_sycl_context_interface.h
+++ b/libsyclinterface/include/dpctl_sycl_context_interface.h
@@ -159,6 +159,6 @@ void DPCTLContext_Delete(__dpctl_take DPCTLSyclContextRef CtxRef);
  * @ingroup ContextInterface
  */
 DPCTL_API
-size_t DPCTLContext_Hash(__dpctl_take DPCTLSyclContextRef CtxRef);
+size_t DPCTLContext_Hash(__dpctl_keep DPCTLSyclContextRef CtxRef);
 
 DPCTL_C_EXTERN_C_END
diff --git a/libsyclinterface/include/dpctl_sycl_platform_interface.h b/libsyclinterface/include/dpctl_sycl_platform_interface.h
index 3e40453e4a..90dab58f1c 100644
--- a/libsyclinterface/include/dpctl_sycl_platform_interface.h
+++ b/libsyclinterface/include/dpctl_sycl_platform_interface.h
@@ -39,6 +39,19 @@ DPCTL_C_EXTERN_C_BEGIN
  * @defgroup PlatformInterface Platform class C wrapper
  */
 
+/*!
+ * @brief Checks if two DPCTLSyclPlatformRef objects point to the same
+ * sycl::platform.
+ *
+ * @param    PRef1         First opaque pointer to a ``sycl::platform``.
+ * @param    PRef2         Second opaque pointer to a ``sycl::platform``.
+ * @return   True if the underlying sycl::platform are same, false otherwise.
+ * @ingroup PlatformInterface
+ */
+DPCTL_API
+bool DPCTLPlatform_AreEq(__dpctl_keep const DPCTLSyclPlatformRef PRef1,
+                         __dpctl_keep const DPCTLSyclPlatformRef PRef2);
+
 /*!
  * @brief Returns a copy of the DPCTLSyclPlatformRef object.
  *
@@ -155,4 +168,14 @@ DPCTL_API
 __dpctl_give DPCTLSyclContextRef
 DPCTLPlatform_GetDefaultContext(__dpctl_keep const DPCTLSyclPlatformRef PRef);
 
+/*!
+ * @brief Wrapper over std::hash<sycl::platform>'s operator()
+ *
+ * @param    PRef        The DPCTLSyclPlatformRef pointer.
+ * @return   Hash value of the underlying ``sycl::platform`` instance.
+ * @ingroup PlatformInterface
+ */
+DPCTL_API
+size_t DPCTLPlatform_Hash(__dpctl_keep DPCTLSyclPlatformRef CtxRef);
+
 DPCTL_C_EXTERN_C_END
diff --git a/libsyclinterface/source/dpctl_sycl_platform_interface.cpp b/libsyclinterface/source/dpctl_sycl_platform_interface.cpp
index 2083259611..5be98b7b61 100644
--- a/libsyclinterface/source/dpctl_sycl_platform_interface.cpp
+++ b/libsyclinterface/source/dpctl_sycl_platform_interface.cpp
@@ -234,3 +234,27 @@ DPCTLPlatform_GetDefaultContext(__dpctl_keep const DPCTLSyclPlatformRef PRef)
         return nullptr;
     }
 }
+
+bool DPCTLPlatform_AreEq(__dpctl_keep const DPCTLSyclPlatformRef PRef1,
+                         __dpctl_keep const DPCTLSyclPlatformRef PRef2)
+{
+    auto P1 = unwrap<platform>(PRef1);
+    auto P2 = unwrap<platform>(PRef2);
+    if (P1 && P2)
+        return *P1 == *P2;
+    else
+        return false;
+}
+
+size_t DPCTLPlatform_Hash(__dpctl_keep const DPCTLSyclPlatformRef PRef)
+{
+    if (PRef) {
+        auto P = unwrap<platform>(PRef);
+        std::hash<platform> hash_fn;
+        return hash_fn(*P);
+    }
+    else {
+        error_handler("Argument PRef is null.", __FILE__, __func__, __LINE__);
+        return 0;
+    }
+}
diff --git a/libsyclinterface/tests/test_sycl_platform_interface.cpp b/libsyclinterface/tests/test_sycl_platform_interface.cpp
index e6fb7df134..f04cead0e1 100644
--- a/libsyclinterface/tests/test_sycl_platform_interface.cpp
+++ b/libsyclinterface/tests/test_sycl_platform_interface.cpp
@@ -264,6 +264,25 @@ TEST_P(TestDPCTLSyclPlatformInterface, ChkPrintInfoNullArg)
     EXPECT_NO_FATAL_FAILURE(DPCTLPlatformMgr_PrintInfo(Null_PRef, 0));
 }
 
+TEST_P(TestDPCTLSyclPlatformInterface, ChkAreEq)
+{
+    DPCTLSyclPlatformRef PRef_Copy = nullptr;
+
+    EXPECT_NO_FATAL_FAILURE(PRef_Copy = DPCTLPlatform_Copy(PRef));
+
+    ASSERT_TRUE(DPCTLPlatform_AreEq(PRef, PRef_Copy));
+    EXPECT_TRUE(DPCTLPlatform_Hash(PRef) == DPCTLPlatform_Hash(PRef_Copy));
+
+    EXPECT_NO_FATAL_FAILURE(DPCTLPlatform_Delete(PRef_Copy));
+}
+
+TEST_P(TestDPCTLSyclPlatformInterface, ChkAreEqNullArg)
+{
+    DPCTLSyclPlatformRef Null_PRef = nullptr;
+    ASSERT_FALSE(DPCTLPlatform_AreEq(PRef, Null_PRef));
+    ASSERT_TRUE(DPCTLPlatform_Hash(Null_PRef) == 0);
+}
+
 TEST_F(TestDPCTLSyclDefaultPlatform, ChkGetName)
 {
     check_platform_name(PRef);