diff --git a/dpctl/tensor/__init__.py b/dpctl/tensor/__init__.py
index 0134998f7a..f1ec439f3a 100644
--- a/dpctl/tensor/__init__.py
+++ b/dpctl/tensor/__init__.py
@@ -135,6 +135,8 @@
     logical_not,
     logical_or,
     logical_xor,
+    maximum,
+    minimum,
     multiply,
     negative,
     not_equal,
@@ -274,6 +276,8 @@
     "log1p",
     "log2",
     "log10",
+    "maximum",
+    "minimum",
     "multiply",
     "negative",
     "not_equal",
diff --git a/dpctl/tensor/_elementwise_funcs.py b/dpctl/tensor/_elementwise_funcs.py
index 0ebe874df2..fe85a183ba 100644
--- a/dpctl/tensor/_elementwise_funcs.py
+++ b/dpctl/tensor/_elementwise_funcs.py
@@ -1176,6 +1176,66 @@
     _logical_xor_docstring_,
 )
 
+# B??: ==== MAXIMUM    (x1, x2)
+_maximum_docstring_ = """
+maximum(x1, x2, out=None, order='K')
+
+Compares two input arrays `x1` and `x2` and returns
+a new array containing the element-wise maxima.
+
+Args:
+    x1 (usm_ndarray):
+        First input array, expected to have numeric data type.
+    x2 (usm_ndarray):
+        Second input array, also expected to have numeric data type.
+    out ({None, usm_ndarray}, optional):
+        Output array to populate.
+        Array have the correct shape and the expected data type.
+    order ("C","F","A","K", optional):
+        Memory layout of the newly output array, if parameter `out` is `None`.
+        Default: "K".
+Returns:
+    usm_narray:
+        An array containing the element-wise maxima. The data type of
+        the returned array is determined by the Type Promotion Rules.
+"""
+maximum = BinaryElementwiseFunc(
+    "maximum",
+    ti._maximum_result_type,
+    ti._maximum,
+    _maximum_docstring_,
+)
+
+# B??: ==== MINIMUM    (x1, x2)
+_minimum_docstring_ = """
+minimum(x1, x2, out=None, order='K')
+
+Compares two input arrays `x1` and `x2` and returns
+a new array containing the element-wise minima.
+
+Args:
+    x1 (usm_ndarray):
+        First input array, expected to have numeric data type.
+    x2 (usm_ndarray):
+        Second input array, also expected to have numeric data type.
+    out ({None, usm_ndarray}, optional):
+        Output array to populate.
+        Array have the correct shape and the expected data type.
+    order ("C","F","A","K", optional):
+        Memory layout of the newly output array, if parameter `out` is `None`.
+        Default: "K".
+Returns:
+    usm_narray:
+        An array containing the element-wise minima. The data type of
+        the returned array is determined by the Type Promotion Rules.
+"""
+minimum = BinaryElementwiseFunc(
+    "minimum",
+    ti._minimum_result_type,
+    ti._minimum,
+    _minimum_docstring_,
+)
+
 # B19: ==== MULTIPLY    (x1, x2)
 _multiply_docstring_ = """
 multiply(x1, x2, out=None, order='K')
@@ -1369,6 +1429,12 @@
         First input array, expected to have a real-valued data type.
     x2 (usm_ndarray):
         Second input array, also expected to have a real-valued data type.
+    out ({None, usm_ndarray}, optional):
+        Output array to populate.
+        Array have the correct shape and the expected data type.
+    order ("C","F","A","K", optional):
+        Memory layout of the newly output array, if parameter `out` is `None`.
+        Default: "K".
 Returns:
     usm_ndarray:
         an array containing the element-wise remainders. The data type of
diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/maximum.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/maximum.hpp
new file mode 100644
index 0000000000..6d12477f66
--- /dev/null
+++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/maximum.hpp
@@ -0,0 +1,286 @@
+//=== maximum.hpp -   Binary function MAXIMUM            ------  *-C++-*--/===//
+//
+//                      Data Parallel Control (dpctl)
+//
+// Copyright 2020-2023 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//===---------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines kernels for elementwise evaluation of MAXIMUM(x1, x2)
+/// function.
+//===---------------------------------------------------------------------===//
+
+#pragma once
+#include <CL/sycl.hpp>
+#include <cstddef>
+#include <cstdint>
+#include <type_traits>
+
+#include "utils/offset_utils.hpp"
+#include "utils/type_dispatch.hpp"
+#include "utils/type_utils.hpp"
+
+#include "kernels/elementwise_functions/common.hpp"
+#include <pybind11/pybind11.h>
+
+namespace dpctl
+{
+namespace tensor
+{
+namespace kernels
+{
+namespace maximum
+{
+
+namespace py = pybind11;
+namespace td_ns = dpctl::tensor::type_dispatch;
+namespace tu_ns = dpctl::tensor::type_utils;
+
+template <typename argT1, typename argT2, typename resT> struct MaximumFunctor
+{
+
+    using supports_sg_loadstore = std::negation<
+        std::disjunction<tu_ns::is_complex<argT1>, tu_ns::is_complex<argT2>>>;
+    using supports_vec = std::conjunction<
+        std::is_same<argT1, argT2>,
+        std::negation<std::disjunction<tu_ns::is_complex<argT1>,
+                                       tu_ns::is_complex<argT2>>>>;
+
+    resT operator()(const argT1 &in1, const argT2 &in2)
+    {
+        if constexpr (tu_ns::is_complex<argT1>::value ||
+                      tu_ns::is_complex<argT2>::value)
+        {
+            static_assert(std::is_same_v<argT1, argT2>);
+            using realT = typename argT1::value_type;
+            realT real1 = std::real(in1);
+            realT real2 = std::real(in2);
+            realT imag1 = std::imag(in1);
+            realT imag2 = std::imag(in2);
+
+            bool gt = (real1 == real2) ? (imag1 > imag2)
+                                       : (real1 > real2 && !std::isnan(imag1) &&
+                                          !std::isnan(imag2));
+            return (std::isnan(real1) || std::isnan(imag1) || gt) ? in1 : in2;
+        }
+        else if constexpr (std::is_floating_point_v<argT1> ||
+                           std::is_same_v<argT1, sycl::half>)
+            return (std::isnan(in1) || in1 > in2) ? in1 : in2;
+        else
+            return (in1 > in2) ? in1 : in2;
+    }
+
+    template <int vec_sz>
+    sycl::vec<resT, vec_sz> operator()(const sycl::vec<argT1, vec_sz> &in1,
+                                       const sycl::vec<argT2, vec_sz> &in2)
+    {
+        sycl::vec<resT, vec_sz> res;
+#pragma unroll
+        for (int i = 0; i < vec_sz; ++i) {
+            if constexpr (std::is_floating_point_v<argT1>)
+                res[i] =
+                    (sycl::isnan(in1[i]) || in1[i] > in2[i]) ? in1[i] : in2[i];
+            else
+                res[i] = (in1[i] > in2[i]) ? in1[i] : in2[i];
+        }
+        return res;
+    }
+};
+
+template <typename argT1,
+          typename argT2,
+          typename resT,
+          unsigned int vec_sz = 4,
+          unsigned int n_vecs = 2>
+using MaximumContigFunctor =
+    elementwise_common::BinaryContigFunctor<argT1,
+                                            argT2,
+                                            resT,
+                                            MaximumFunctor<argT1, argT2, resT>,
+                                            vec_sz,
+                                            n_vecs>;
+
+template <typename argT1, typename argT2, typename resT, typename IndexerT>
+using MaximumStridedFunctor = elementwise_common::BinaryStridedFunctor<
+    argT1,
+    argT2,
+    resT,
+    IndexerT,
+    MaximumFunctor<argT1, argT2, resT>>;
+
+template <typename T1, typename T2> struct MaximumOutputType
+{
+    using value_type = typename std::disjunction< // disjunction is C++17
+                                                  // feature, supported by DPC++
+        td_ns::BinaryTypeMapResultEntry<T1, bool, T2, bool, bool>,
+        td_ns::BinaryTypeMapResultEntry<T1,
+                                        std::uint8_t,
+                                        T2,
+                                        std::uint8_t,
+                                        std::uint8_t>,
+        td_ns::BinaryTypeMapResultEntry<T1,
+                                        std::int8_t,
+                                        T2,
+                                        std::int8_t,
+                                        std::int8_t>,
+        td_ns::BinaryTypeMapResultEntry<T1,
+                                        std::uint16_t,
+                                        T2,
+                                        std::uint16_t,
+                                        std::uint16_t>,
+        td_ns::BinaryTypeMapResultEntry<T1,
+                                        std::int16_t,
+                                        T2,
+                                        std::int16_t,
+                                        std::int16_t>,
+        td_ns::BinaryTypeMapResultEntry<T1,
+                                        std::uint32_t,
+                                        T2,
+                                        std::uint32_t,
+                                        std::uint32_t>,
+        td_ns::BinaryTypeMapResultEntry<T1,
+                                        std::int32_t,
+                                        T2,
+                                        std::int32_t,
+                                        std::int32_t>,
+        td_ns::BinaryTypeMapResultEntry<T1,
+                                        std::uint64_t,
+                                        T2,
+                                        std::uint64_t,
+                                        std::uint64_t>,
+        td_ns::BinaryTypeMapResultEntry<T1,
+                                        std::int64_t,
+                                        T2,
+                                        std::int64_t,
+                                        std::int64_t>,
+        td_ns::BinaryTypeMapResultEntry<T1,
+                                        sycl::half,
+                                        T2,
+                                        sycl::half,
+                                        sycl::half>,
+        td_ns::BinaryTypeMapResultEntry<T1, float, T2, float, float>,
+        td_ns::BinaryTypeMapResultEntry<T1, double, T2, double, double>,
+        td_ns::BinaryTypeMapResultEntry<T1,
+                                        std::complex<float>,
+                                        T2,
+                                        std::complex<float>,
+                                        std::complex<float>>,
+        td_ns::BinaryTypeMapResultEntry<T1,
+                                        std::complex<double>,
+                                        T2,
+                                        std::complex<double>,
+                                        std::complex<double>>,
+        td_ns::DefaultResultEntry<void>>::result_type;
+};
+
+template <typename argT1,
+          typename argT2,
+          typename resT,
+          unsigned int vec_sz,
+          unsigned int n_vecs>
+class maximum_contig_kernel;
+
+template <typename argTy1, typename argTy2>
+sycl::event maximum_contig_impl(sycl::queue exec_q,
+                                size_t nelems,
+                                const char *arg1_p,
+                                py::ssize_t arg1_offset,
+                                const char *arg2_p,
+                                py::ssize_t arg2_offset,
+                                char *res_p,
+                                py::ssize_t res_offset,
+                                const std::vector<sycl::event> &depends = {})
+{
+    return elementwise_common::binary_contig_impl<
+        argTy1, argTy2, MaximumOutputType, MaximumContigFunctor,
+        maximum_contig_kernel>(exec_q, nelems, arg1_p, arg1_offset, arg2_p,
+                               arg2_offset, res_p, res_offset, depends);
+}
+
+template <typename fnT, typename T1, typename T2> struct MaximumContigFactory
+{
+    fnT get()
+    {
+        if constexpr (std::is_same_v<
+                          typename MaximumOutputType<T1, T2>::value_type, void>)
+        {
+            fnT fn = nullptr;
+            return fn;
+        }
+        else {
+            fnT fn = maximum_contig_impl<T1, T2>;
+            return fn;
+        }
+    }
+};
+
+template <typename fnT, typename T1, typename T2> struct MaximumTypeMapFactory
+{
+    /*! @brief get typeid for output type of maximum(T1 x, T2 y) */
+    std::enable_if_t<std::is_same<fnT, int>::value, int> get()
+    {
+        using rT = typename MaximumOutputType<T1, T2>::value_type;
+        ;
+        return td_ns::GetTypeid<rT>{}.get();
+    }
+};
+
+template <typename T1, typename T2, typename resT, typename IndexerT>
+class maximum_strided_kernel;
+
+template <typename argTy1, typename argTy2>
+sycl::event
+maximum_strided_impl(sycl::queue exec_q,
+                     size_t nelems,
+                     int nd,
+                     const py::ssize_t *shape_and_strides,
+                     const char *arg1_p,
+                     py::ssize_t arg1_offset,
+                     const char *arg2_p,
+                     py::ssize_t arg2_offset,
+                     char *res_p,
+                     py::ssize_t res_offset,
+                     const std::vector<sycl::event> &depends,
+                     const std::vector<sycl::event> &additional_depends)
+{
+    return elementwise_common::binary_strided_impl<
+        argTy1, argTy2, MaximumOutputType, MaximumStridedFunctor,
+        maximum_strided_kernel>(exec_q, nelems, nd, shape_and_strides, arg1_p,
+                                arg1_offset, arg2_p, arg2_offset, res_p,
+                                res_offset, depends, additional_depends);
+}
+
+template <typename fnT, typename T1, typename T2> struct MaximumStridedFactory
+{
+    fnT get()
+    {
+        if constexpr (std::is_same_v<
+                          typename MaximumOutputType<T1, T2>::value_type, void>)
+        {
+            fnT fn = nullptr;
+            return fn;
+        }
+        else {
+            fnT fn = maximum_strided_impl<T1, T2>;
+            return fn;
+        }
+    }
+};
+
+} // namespace maximum
+} // namespace kernels
+} // namespace tensor
+} // namespace dpctl
diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/minimum.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/minimum.hpp
new file mode 100644
index 0000000000..baddbe388d
--- /dev/null
+++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/minimum.hpp
@@ -0,0 +1,286 @@
+//=== minimum.hpp -   Binary function MINIMUM           ------  *-C++-*--/===//
+//
+//                      Data Parallel Control (dpctl)
+//
+// Copyright 2020-2023 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//===---------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines kernels for elementwise evaluation of MINIMUM(x1, x2)
+/// function.
+//===---------------------------------------------------------------------===//
+
+#pragma once
+#include <CL/sycl.hpp>
+#include <cstddef>
+#include <cstdint>
+#include <type_traits>
+
+#include "utils/offset_utils.hpp"
+#include "utils/type_dispatch.hpp"
+#include "utils/type_utils.hpp"
+
+#include "kernels/elementwise_functions/common.hpp"
+#include <pybind11/pybind11.h>
+
+namespace dpctl
+{
+namespace tensor
+{
+namespace kernels
+{
+namespace minimum
+{
+
+namespace py = pybind11;
+namespace td_ns = dpctl::tensor::type_dispatch;
+namespace tu_ns = dpctl::tensor::type_utils;
+
+template <typename argT1, typename argT2, typename resT> struct MinimumFunctor
+{
+
+    using supports_sg_loadstore = std::negation<
+        std::disjunction<tu_ns::is_complex<argT1>, tu_ns::is_complex<argT2>>>;
+    using supports_vec = std::conjunction<
+        std::is_same<argT1, argT2>,
+        std::negation<std::disjunction<tu_ns::is_complex<argT1>,
+                                       tu_ns::is_complex<argT2>>>>;
+
+    resT operator()(const argT1 &in1, const argT2 &in2)
+    {
+        if constexpr (tu_ns::is_complex<argT1>::value ||
+                      tu_ns::is_complex<argT2>::value)
+        {
+            static_assert(std::is_same_v<argT1, argT2>);
+            using realT = typename argT1::value_type;
+            realT real1 = std::real(in1);
+            realT real2 = std::real(in2);
+            realT imag1 = std::imag(in1);
+            realT imag2 = std::imag(in2);
+
+            bool lt = (real1 == real2) ? (imag1 < imag2)
+                                       : (real1 < real2 && !std::isnan(imag1) &&
+                                          !std::isnan(imag2));
+            return (std::isnan(real1) || std::isnan(imag1) || lt) ? in1 : in2;
+        }
+        else if constexpr (std::is_floating_point_v<argT1> ||
+                           std::is_same_v<argT1, sycl::half>)
+            return (std::isnan(in1) || in1 < in2) ? in1 : in2;
+        else
+            return (in1 < in2) ? in1 : in2;
+    }
+
+    template <int vec_sz>
+    sycl::vec<resT, vec_sz> operator()(const sycl::vec<argT1, vec_sz> &in1,
+                                       const sycl::vec<argT2, vec_sz> &in2)
+    {
+        sycl::vec<resT, vec_sz> res;
+#pragma unroll
+        for (int i = 0; i < vec_sz; ++i) {
+            if constexpr (std::is_floating_point_v<argT1>)
+                res[i] =
+                    (sycl::isnan(in1[i]) || in1[i] < in2[i]) ? in1[i] : in2[i];
+            else
+                res[i] = (in1[i] < in2[i]) ? in1[i] : in2[i];
+        }
+        return res;
+    }
+};
+
+template <typename argT1,
+          typename argT2,
+          typename resT,
+          unsigned int vec_sz = 4,
+          unsigned int n_vecs = 2>
+using MinimumContigFunctor =
+    elementwise_common::BinaryContigFunctor<argT1,
+                                            argT2,
+                                            resT,
+                                            MinimumFunctor<argT1, argT2, resT>,
+                                            vec_sz,
+                                            n_vecs>;
+
+template <typename argT1, typename argT2, typename resT, typename IndexerT>
+using MinimumStridedFunctor = elementwise_common::BinaryStridedFunctor<
+    argT1,
+    argT2,
+    resT,
+    IndexerT,
+    MinimumFunctor<argT1, argT2, resT>>;
+
+template <typename T1, typename T2> struct MinimumOutputType
+{
+    using value_type = typename std::disjunction< // disjunction is C++17
+                                                  // feature, supported by DPC++
+        td_ns::BinaryTypeMapResultEntry<T1, bool, T2, bool, bool>,
+        td_ns::BinaryTypeMapResultEntry<T1,
+                                        std::uint8_t,
+                                        T2,
+                                        std::uint8_t,
+                                        std::uint8_t>,
+        td_ns::BinaryTypeMapResultEntry<T1,
+                                        std::int8_t,
+                                        T2,
+                                        std::int8_t,
+                                        std::int8_t>,
+        td_ns::BinaryTypeMapResultEntry<T1,
+                                        std::uint16_t,
+                                        T2,
+                                        std::uint16_t,
+                                        std::uint16_t>,
+        td_ns::BinaryTypeMapResultEntry<T1,
+                                        std::int16_t,
+                                        T2,
+                                        std::int16_t,
+                                        std::int16_t>,
+        td_ns::BinaryTypeMapResultEntry<T1,
+                                        std::uint32_t,
+                                        T2,
+                                        std::uint32_t,
+                                        std::uint32_t>,
+        td_ns::BinaryTypeMapResultEntry<T1,
+                                        std::int32_t,
+                                        T2,
+                                        std::int32_t,
+                                        std::int32_t>,
+        td_ns::BinaryTypeMapResultEntry<T1,
+                                        std::uint64_t,
+                                        T2,
+                                        std::uint64_t,
+                                        std::uint64_t>,
+        td_ns::BinaryTypeMapResultEntry<T1,
+                                        std::int64_t,
+                                        T2,
+                                        std::int64_t,
+                                        std::int64_t>,
+        td_ns::BinaryTypeMapResultEntry<T1,
+                                        sycl::half,
+                                        T2,
+                                        sycl::half,
+                                        sycl::half>,
+        td_ns::BinaryTypeMapResultEntry<T1, float, T2, float, float>,
+        td_ns::BinaryTypeMapResultEntry<T1, double, T2, double, double>,
+        td_ns::BinaryTypeMapResultEntry<T1,
+                                        std::complex<float>,
+                                        T2,
+                                        std::complex<float>,
+                                        std::complex<float>>,
+        td_ns::BinaryTypeMapResultEntry<T1,
+                                        std::complex<double>,
+                                        T2,
+                                        std::complex<double>,
+                                        std::complex<double>>,
+        td_ns::DefaultResultEntry<void>>::result_type;
+};
+
+template <typename argT1,
+          typename argT2,
+          typename resT,
+          unsigned int vec_sz,
+          unsigned int n_vecs>
+class minimum_contig_kernel;
+
+template <typename argTy1, typename argTy2>
+sycl::event minimum_contig_impl(sycl::queue exec_q,
+                                size_t nelems,
+                                const char *arg1_p,
+                                py::ssize_t arg1_offset,
+                                const char *arg2_p,
+                                py::ssize_t arg2_offset,
+                                char *res_p,
+                                py::ssize_t res_offset,
+                                const std::vector<sycl::event> &depends = {})
+{
+    return elementwise_common::binary_contig_impl<
+        argTy1, argTy2, MinimumOutputType, MinimumContigFunctor,
+        minimum_contig_kernel>(exec_q, nelems, arg1_p, arg1_offset, arg2_p,
+                               arg2_offset, res_p, res_offset, depends);
+}
+
+template <typename fnT, typename T1, typename T2> struct MinimumContigFactory
+{
+    fnT get()
+    {
+        if constexpr (std::is_same_v<
+                          typename MinimumOutputType<T1, T2>::value_type, void>)
+        {
+            fnT fn = nullptr;
+            return fn;
+        }
+        else {
+            fnT fn = minimum_contig_impl<T1, T2>;
+            return fn;
+        }
+    }
+};
+
+template <typename fnT, typename T1, typename T2> struct MinimumTypeMapFactory
+{
+    /*! @brief get typeid for output type of minimum(T1 x, T2 y) */
+    std::enable_if_t<std::is_same<fnT, int>::value, int> get()
+    {
+        using rT = typename MinimumOutputType<T1, T2>::value_type;
+        ;
+        return td_ns::GetTypeid<rT>{}.get();
+    }
+};
+
+template <typename T1, typename T2, typename resT, typename IndexerT>
+class minimum_strided_kernel;
+
+template <typename argTy1, typename argTy2>
+sycl::event
+minimum_strided_impl(sycl::queue exec_q,
+                     size_t nelems,
+                     int nd,
+                     const py::ssize_t *shape_and_strides,
+                     const char *arg1_p,
+                     py::ssize_t arg1_offset,
+                     const char *arg2_p,
+                     py::ssize_t arg2_offset,
+                     char *res_p,
+                     py::ssize_t res_offset,
+                     const std::vector<sycl::event> &depends,
+                     const std::vector<sycl::event> &additional_depends)
+{
+    return elementwise_common::binary_strided_impl<
+        argTy1, argTy2, MinimumOutputType, MinimumStridedFunctor,
+        minimum_strided_kernel>(exec_q, nelems, nd, shape_and_strides, arg1_p,
+                                arg1_offset, arg2_p, arg2_offset, res_p,
+                                res_offset, depends, additional_depends);
+}
+
+template <typename fnT, typename T1, typename T2> struct MinimumStridedFactory
+{
+    fnT get()
+    {
+        if constexpr (std::is_same_v<
+                          typename MinimumOutputType<T1, T2>::value_type, void>)
+        {
+            fnT fn = nullptr;
+            return fn;
+        }
+        else {
+            fnT fn = minimum_strided_impl<T1, T2>;
+            return fn;
+        }
+    }
+};
+
+} // namespace minimum
+} // namespace kernels
+} // namespace tensor
+} // namespace dpctl
diff --git a/dpctl/tensor/libtensor/source/elementwise_functions.cpp b/dpctl/tensor/libtensor/source/elementwise_functions.cpp
index 22eb85eea9..cc95cecb38 100644
--- a/dpctl/tensor/libtensor/source/elementwise_functions.cpp
+++ b/dpctl/tensor/libtensor/source/elementwise_functions.cpp
@@ -74,6 +74,8 @@
 #include "kernels/elementwise_functions/logical_not.hpp"
 #include "kernels/elementwise_functions/logical_or.hpp"
 #include "kernels/elementwise_functions/logical_xor.hpp"
+#include "kernels/elementwise_functions/maximum.hpp"
+#include "kernels/elementwise_functions/minimum.hpp"
 #include "kernels/elementwise_functions/multiply.hpp"
 #include "kernels/elementwise_functions/negative.hpp"
 #include "kernels/elementwise_functions/not_equal.hpp"
@@ -1794,6 +1796,86 @@ void populate_logical_xor_dispatch_tables(void)
 };
 } // namespace impl
 
+// B??: ==== MAXIMUM    (x1, x2)
+namespace impl
+{
+
+namespace maximum_fn_ns = dpctl::tensor::kernels::maximum;
+
+static binary_contig_impl_fn_ptr_t
+    maximum_contig_dispatch_table[td_ns::num_types][td_ns::num_types];
+static int maximum_output_id_table[td_ns::num_types][td_ns::num_types];
+
+static binary_strided_impl_fn_ptr_t
+    maximum_strided_dispatch_table[td_ns::num_types][td_ns::num_types];
+
+void populate_maximum_dispatch_tables(void)
+{
+    using namespace td_ns;
+    namespace fn_ns = maximum_fn_ns;
+
+    // which input types are supported, and what is the type of the result
+    using fn_ns::MaximumTypeMapFactory;
+    DispatchTableBuilder<int, MaximumTypeMapFactory, num_types> dtb1;
+    dtb1.populate_dispatch_table(maximum_output_id_table);
+
+    // function pointers for operation on general strided arrays
+    using fn_ns::MaximumStridedFactory;
+    DispatchTableBuilder<binary_strided_impl_fn_ptr_t, MaximumStridedFactory,
+                         num_types>
+        dtb2;
+    dtb2.populate_dispatch_table(maximum_strided_dispatch_table);
+
+    // function pointers for operation on contiguous inputs and output
+    using fn_ns::MaximumContigFactory;
+    DispatchTableBuilder<binary_contig_impl_fn_ptr_t, MaximumContigFactory,
+                         num_types>
+        dtb3;
+    dtb3.populate_dispatch_table(maximum_contig_dispatch_table);
+};
+
+} // namespace impl
+
+// B??: ==== MINIMUM    (x1, x2)
+namespace impl
+{
+
+namespace minimum_fn_ns = dpctl::tensor::kernels::minimum;
+
+static binary_contig_impl_fn_ptr_t
+    minimum_contig_dispatch_table[td_ns::num_types][td_ns::num_types];
+static int minimum_output_id_table[td_ns::num_types][td_ns::num_types];
+
+static binary_strided_impl_fn_ptr_t
+    minimum_strided_dispatch_table[td_ns::num_types][td_ns::num_types];
+
+void populate_minimum_dispatch_tables(void)
+{
+    using namespace td_ns;
+    namespace fn_ns = minimum_fn_ns;
+
+    // which input types are supported, and what is the type of the result
+    using fn_ns::MinimumTypeMapFactory;
+    DispatchTableBuilder<int, MinimumTypeMapFactory, num_types> dtb1;
+    dtb1.populate_dispatch_table(minimum_output_id_table);
+
+    // function pointers for operation on general strided arrays
+    using fn_ns::MinimumStridedFactory;
+    DispatchTableBuilder<binary_strided_impl_fn_ptr_t, MinimumStridedFactory,
+                         num_types>
+        dtb2;
+    dtb2.populate_dispatch_table(minimum_strided_dispatch_table);
+
+    // function pointers for operation on contiguous inputs and output
+    using fn_ns::MinimumContigFactory;
+    DispatchTableBuilder<binary_contig_impl_fn_ptr_t, MinimumContigFactory,
+                         num_types>
+        dtb3;
+    dtb3.populate_dispatch_table(minimum_contig_dispatch_table);
+};
+
+} // namespace impl
+
 // B19: ==== MULTIPLY    (x1, x2)
 namespace impl
 {
@@ -3965,6 +4047,86 @@ void init_elementwise_functions(py::module_ m)
         m.def("_logical_xor_result_type", logical_xor_result_type_pyapi, "");
     }
 
+    // B??: ==== MAXIMUM    (x1, x2)
+    {
+        impl::populate_maximum_dispatch_tables();
+        using impl::maximum_contig_dispatch_table;
+        using impl::maximum_output_id_table;
+        using impl::maximum_strided_dispatch_table;
+
+        auto maximum_pyapi = [&](dpctl::tensor::usm_ndarray src1,
+                                 dpctl::tensor::usm_ndarray src2,
+                                 dpctl::tensor::usm_ndarray dst,
+                                 sycl::queue exec_q,
+                                 const std::vector<sycl::event> &depends = {}) {
+            return py_binary_ufunc(
+                src1, src2, dst, exec_q, depends, maximum_output_id_table,
+                // function pointers to handle operation on contiguous
+                // arrays (pointers may be nullptr)
+                maximum_contig_dispatch_table,
+                // function pointers to handle operation on strided arrays
+                // (most general case)
+                maximum_strided_dispatch_table,
+                // function pointers to handle operation of c-contig matrix
+                // and c-contig row with broadcasting (may be nullptr)
+                td_ns::NullPtrTable<
+                    binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{},
+                // function pointers to handle operation of c-contig matrix
+                // and c-contig row with broadcasting (may be nullptr)
+                td_ns::NullPtrTable<
+                    binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{});
+        };
+        auto maximum_result_type_pyapi = [&](py::dtype dtype1,
+                                             py::dtype dtype2) {
+            return py_binary_ufunc_result_type(dtype1, dtype2,
+                                               maximum_output_id_table);
+        };
+        m.def("_maximum", maximum_pyapi, "", py::arg("src1"), py::arg("src2"),
+              py::arg("dst"), py::arg("sycl_queue"),
+              py::arg("depends") = py::list());
+        m.def("_maximum_result_type", maximum_result_type_pyapi, "");
+    }
+
+    // B??: ==== MINIMUM    (x1, x2)
+    {
+        impl::populate_minimum_dispatch_tables();
+        using impl::minimum_contig_dispatch_table;
+        using impl::minimum_output_id_table;
+        using impl::minimum_strided_dispatch_table;
+
+        auto minimum_pyapi = [&](dpctl::tensor::usm_ndarray src1,
+                                 dpctl::tensor::usm_ndarray src2,
+                                 dpctl::tensor::usm_ndarray dst,
+                                 sycl::queue exec_q,
+                                 const std::vector<sycl::event> &depends = {}) {
+            return py_binary_ufunc(
+                src1, src2, dst, exec_q, depends, minimum_output_id_table,
+                // function pointers to handle operation on contiguous
+                // arrays (pointers may be nullptr)
+                minimum_contig_dispatch_table,
+                // function pointers to handle operation on strided arrays
+                // (most general case)
+                minimum_strided_dispatch_table,
+                // function pointers to handle operation of c-contig matrix
+                // and c-contig row with broadcasting (may be nullptr)
+                td_ns::NullPtrTable<
+                    binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{},
+                // function pointers to handle operation of c-contig matrix and
+                // c-contig row with broadcasting (may be nullptr)
+                td_ns::NullPtrTable<
+                    binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{});
+        };
+        auto minimum_result_type_pyapi = [&](py::dtype dtype1,
+                                             py::dtype dtype2) {
+            return py_binary_ufunc_result_type(dtype1, dtype2,
+                                               minimum_output_id_table);
+        };
+        m.def("_minimum", minimum_pyapi, "", py::arg("src1"), py::arg("src2"),
+              py::arg("dst"), py::arg("sycl_queue"),
+              py::arg("depends") = py::list());
+        m.def("_minimum_result_type", minimum_result_type_pyapi, "");
+    }
+
     // B19: ==== MULTIPLY    (x1, x2)
     {
         impl::populate_multiply_dispatch_tables();
diff --git a/dpctl/tests/elementwise/test_greater.py b/dpctl/tests/elementwise/test_greater.py
index fbda074e53..97915411ea 100644
--- a/dpctl/tests/elementwise/test_greater.py
+++ b/dpctl/tests/elementwise/test_greater.py
@@ -8,7 +8,7 @@
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
-#  Ungreater required by applicable law or agreed to in writing, software
+#  Unless required by applicable law or agreed to in writing, software
 #  distributed under the License is distributed on an "AS IS" BASIS,
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
diff --git a/dpctl/tests/elementwise/test_greater_equal.py b/dpctl/tests/elementwise/test_greater_equal.py
index 3f56e5d460..d188a852ea 100644
--- a/dpctl/tests/elementwise/test_greater_equal.py
+++ b/dpctl/tests/elementwise/test_greater_equal.py
@@ -8,7 +8,7 @@
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
-#  Ungreater_equal required by applicable law or agreed to in writing, software
+#  Unless required by applicable law or agreed to in writing, software
 #  distributed under the License is distributed on an "AS IS" BASIS,
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
diff --git a/dpctl/tests/elementwise/test_less_equal.py b/dpctl/tests/elementwise/test_less_equal.py
index b539d6a48f..1949b9d91f 100644
--- a/dpctl/tests/elementwise/test_less_equal.py
+++ b/dpctl/tests/elementwise/test_less_equal.py
@@ -8,7 +8,7 @@
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
-#  Unless_equal required by applicable law or agreed to in writing, software
+#  Unless required by applicable law or agreed to in writing, software
 #  distributed under the License is distributed on an "AS IS" BASIS,
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
diff --git a/dpctl/tests/elementwise/test_logical_and.py b/dpctl/tests/elementwise/test_logical_and.py
index 12a35b06d6..c897bde876 100644
--- a/dpctl/tests/elementwise/test_logical_and.py
+++ b/dpctl/tests/elementwise/test_logical_and.py
@@ -8,7 +8,7 @@
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
-#  Unless_equal required by applicable law or agreed to in writing, software
+#  Unless required by applicable law or agreed to in writing, software
 #  distributed under the License is distributed on an "AS IS" BASIS,
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
diff --git a/dpctl/tests/elementwise/test_logical_not.py b/dpctl/tests/elementwise/test_logical_not.py
index aec9bf31b4..80ebaebb89 100644
--- a/dpctl/tests/elementwise/test_logical_not.py
+++ b/dpctl/tests/elementwise/test_logical_not.py
@@ -8,7 +8,7 @@
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
-#  Unless_equal required by applicable law or agreed to in writing, software
+#  Unless required by applicable law or agreed to in writing, software
 #  distributed under the License is distributed on an "AS IS" BASIS,
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
diff --git a/dpctl/tests/elementwise/test_logical_or.py b/dpctl/tests/elementwise/test_logical_or.py
index f99f6758f5..9bc46cb042 100644
--- a/dpctl/tests/elementwise/test_logical_or.py
+++ b/dpctl/tests/elementwise/test_logical_or.py
@@ -8,7 +8,7 @@
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
-#  Unless_equal required by applicable law or agreed to in writing, software
+#  Unless required by applicable law or agreed to in writing, software
 #  distributed under the License is distributed on an "AS IS" BASIS,
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
diff --git a/dpctl/tests/elementwise/test_logical_xor.py b/dpctl/tests/elementwise/test_logical_xor.py
index 9c34e8bbb5..aef9bc2f42 100644
--- a/dpctl/tests/elementwise/test_logical_xor.py
+++ b/dpctl/tests/elementwise/test_logical_xor.py
@@ -8,7 +8,7 @@
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
-#  Unless_equal required by applicable law or agreed to in writing, software
+#  Unless required by applicable law or agreed to in writing, software
 #  distributed under the License is distributed on an "AS IS" BASIS,
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
diff --git a/dpctl/tests/elementwise/test_maximum_minimum.py b/dpctl/tests/elementwise/test_maximum_minimum.py
new file mode 100644
index 0000000000..e8b845d20d
--- /dev/null
+++ b/dpctl/tests/elementwise/test_maximum_minimum.py
@@ -0,0 +1,314 @@
+#                       Data Parallel Control (dpctl)
+#
+#  Copyright 2020-2023 Intel Corporation
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+import ctypes
+import itertools
+
+import numpy as np
+import pytest
+from numpy.testing import assert_array_equal
+
+import dpctl
+import dpctl.tensor as dpt
+from dpctl.tests.helper import get_queue_or_skip, skip_if_dtype_not_supported
+
+from .utils import _all_dtypes, _compare_dtypes, _usm_types
+
+
+@pytest.mark.parametrize("op1_dtype", _all_dtypes)
+@pytest.mark.parametrize("op2_dtype", _all_dtypes)
+def test_maximum_minimum_dtype_matrix(op1_dtype, op2_dtype):
+    q = get_queue_or_skip()
+    skip_if_dtype_not_supported(op1_dtype, q)
+    skip_if_dtype_not_supported(op2_dtype, q)
+
+    sz = 127
+    ar1_np = np.arange(sz)
+    np.random.shuffle(ar1_np)
+    ar1 = dpt.asarray(ar1_np, dtype=op1_dtype)
+    ar2_np = np.arange(sz)
+    np.random.shuffle(ar2_np)
+    ar2 = dpt.asarray(ar2_np, dtype=op2_dtype)
+
+    r = dpt.maximum(ar1, ar2)
+    assert isinstance(r, dpt.usm_ndarray)
+    expected = np.maximum(ar1_np.astype(op1_dtype), ar2_np.astype(op2_dtype))
+
+    assert _compare_dtypes(r.dtype, expected.dtype, sycl_queue=q)
+    assert r.shape == ar1.shape
+    assert (dpt.asnumpy(r) == expected).all()
+    assert r.sycl_queue == ar1.sycl_queue
+
+    r = dpt.minimum(ar1, ar2)
+    assert isinstance(r, dpt.usm_ndarray)
+    expected = np.minimum(ar1_np.astype(op1_dtype), ar2_np.astype(op2_dtype))
+
+    assert _compare_dtypes(r.dtype, expected.dtype, sycl_queue=q)
+    assert r.shape == ar1.shape
+    assert (dpt.asnumpy(r) == expected).all()
+    assert r.sycl_queue == ar1.sycl_queue
+
+    ar3_np = np.arange(sz)
+    np.random.shuffle(ar3_np)
+    ar3 = dpt.asarray(ar3_np, dtype=op1_dtype)
+    ar4_np = np.arange(2 * sz)
+    np.random.shuffle(ar4_np)
+    ar4 = dpt.asarray(ar4_np, dtype=op2_dtype)
+
+    r = dpt.maximum(ar3[::-1], ar4[::2])
+    assert isinstance(r, dpt.usm_ndarray)
+    expected = np.maximum(
+        ar3_np[::-1].astype(op1_dtype), ar4_np[::2].astype(op2_dtype)
+    )
+
+    assert _compare_dtypes(r.dtype, expected.dtype, sycl_queue=q)
+    assert r.shape == ar3.shape
+    assert (dpt.asnumpy(r) == expected).all()
+
+    r = dpt.minimum(ar3[::-1], ar4[::2])
+    assert isinstance(r, dpt.usm_ndarray)
+    expected = np.minimum(
+        ar3_np[::-1].astype(op1_dtype), ar4_np[::2].astype(op2_dtype)
+    )
+
+    assert _compare_dtypes(r.dtype, expected.dtype, sycl_queue=q)
+    assert r.shape == ar3.shape
+    assert (dpt.asnumpy(r) == expected).all()
+
+
+@pytest.mark.parametrize("op_dtype", ["c8", "c16"])
+def test_maximum_minimum_complex_matrix(op_dtype):
+    q = get_queue_or_skip()
+    skip_if_dtype_not_supported(op_dtype, q)
+
+    sz = 127
+    ar1_np_real = np.random.randint(0, 10, sz)
+    ar1_np_imag = np.random.randint(0, 10, sz)
+    ar1 = dpt.asarray(ar1_np_real + 1j * ar1_np_imag, dtype=op_dtype)
+
+    ar2_np_real = np.random.randint(0, 10, sz)
+    ar2_np_imag = np.random.randint(0, 10, sz)
+    ar2 = dpt.asarray(ar2_np_real + 1j * ar2_np_imag, dtype=op_dtype)
+
+    r = dpt.maximum(ar1, ar2)
+    expected = np.maximum(dpt.asnumpy(ar1), dpt.asnumpy(ar2))
+    assert _compare_dtypes(r.dtype, expected.dtype, sycl_queue=q)
+    assert r.shape == expected.shape
+    assert_array_equal(dpt.asnumpy(r), expected)
+
+    r1 = dpt.maximum(ar1[::-2], ar2[::2])
+    expected1 = np.maximum(dpt.asnumpy(ar1[::-2]), dpt.asnumpy(ar2[::2]))
+    assert _compare_dtypes(r.dtype, expected1.dtype, sycl_queue=q)
+    assert r1.shape == expected1.shape
+    assert_array_equal(dpt.asnumpy(r1), expected1)
+
+    r = dpt.minimum(ar1, ar2)
+    expected = np.minimum(dpt.asnumpy(ar1), dpt.asnumpy(ar2))
+    assert _compare_dtypes(r.dtype, expected.dtype, sycl_queue=q)
+    assert r.shape == expected.shape
+    assert_array_equal(dpt.asnumpy(r), expected)
+
+    r1 = dpt.minimum(ar1[::-2], ar2[::2])
+    expected1 = np.minimum(dpt.asnumpy(ar1[::-2]), dpt.asnumpy(ar2[::2]))
+    assert _compare_dtypes(r.dtype, expected1.dtype, sycl_queue=q)
+    assert r1.shape == expected1.shape
+    assert_array_equal(dpt.asnumpy(r1), expected1)
+
+
+@pytest.mark.parametrize("dtype", ["f2", "f4", "f8"])
+def test_maximum_minimum_real_special_cases(dtype):
+    q = get_queue_or_skip()
+    skip_if_dtype_not_supported(dtype, q)
+
+    x = [np.nan, np.inf, -np.inf, 5.0, -3.0]
+    x = list(itertools.product(x, repeat=2))
+    Xnp = np.array([tup[0] for tup in x], dtype=dtype)
+    Ynp = np.array([tup[1] for tup in x], dtype=dtype)
+    X = dpt.asarray(Xnp, dtype=dtype)
+    Y = dpt.asarray(Ynp, dtype=dtype)
+
+    R = dpt.maximum(X, Y)
+    Rnp = np.maximum(Xnp, Ynp)
+    assert_array_equal(dpt.asnumpy(R), Rnp)
+
+    R = dpt.minimum(X, Y)
+    Rnp = np.minimum(Xnp, Ynp)
+    assert_array_equal(dpt.asnumpy(R), Rnp)
+
+
+@pytest.mark.parametrize("dtype", ["c8", "c16"])
+def test_maximum_minimum_complex_special_cases(dtype):
+    q = get_queue_or_skip()
+    skip_if_dtype_not_supported(dtype, q)
+
+    x = [np.nan, -np.inf, -np.inf, +2.0, -1.0]
+    x = [complex(*val) for val in itertools.product(x, repeat=2)]
+    x = list(itertools.product(x, repeat=2))
+
+    Xnp = np.array([tup[0] for tup in x], dtype=dtype)
+    Ynp = np.array([tup[1] for tup in x], dtype=dtype)
+    X = dpt.asarray(Xnp, dtype=dtype, sycl_queue=q)
+    Y = dpt.asarray(Ynp, dtype=dtype, sycl_queue=q)
+
+    R = dpt.maximum(X, Y)
+    Rnp = np.maximum(Xnp, Ynp)
+    assert_array_equal(dpt.asnumpy(dpt.real(R)), np.real(Rnp))
+    assert_array_equal(dpt.asnumpy(dpt.imag(R)), np.imag(Rnp))
+
+    R = dpt.minimum(X, Y)
+    Rnp = np.minimum(Xnp, Ynp)
+    assert_array_equal(dpt.asnumpy(dpt.real(R)), np.real(Rnp))
+    assert_array_equal(dpt.asnumpy(dpt.imag(R)), np.imag(Rnp))
+
+
+@pytest.mark.parametrize("op1_usm_type", _usm_types)
+@pytest.mark.parametrize("op2_usm_type", _usm_types)
+def test_maximum_minimum_usm_type_matrix(op1_usm_type, op2_usm_type):
+    get_queue_or_skip()
+
+    sz = 128
+    ar1_np = np.arange(sz, dtype="i4")
+    np.random.shuffle(ar1_np)
+    ar1 = dpt.asarray(ar1_np, usm_type=op1_usm_type)
+    ar2_np = np.arange(sz, dtype="i4")
+    np.random.shuffle(ar2_np)
+    ar2 = dpt.asarray(ar2_np, usm_type=op2_usm_type)
+
+    r = dpt.maximum(ar1, ar2)
+    assert isinstance(r, dpt.usm_ndarray)
+    expected_usm_type = dpctl.utils.get_coerced_usm_type(
+        (op1_usm_type, op2_usm_type)
+    )
+    assert r.usm_type == expected_usm_type
+
+    r = dpt.minimum(ar1, ar2)
+    assert isinstance(r, dpt.usm_ndarray)
+    expected_usm_type = dpctl.utils.get_coerced_usm_type(
+        (op1_usm_type, op2_usm_type)
+    )
+    assert r.usm_type == expected_usm_type
+
+
+def test_maximum_minimum_order():
+    get_queue_or_skip()
+
+    ar1_np = np.arange(20 * 20, dtype="i4").reshape(20, 20)
+    np.random.shuffle(ar1_np)
+    ar1 = dpt.asarray(ar1_np, order="C")
+    ar2_np = np.arange(20 * 20, dtype="i4").reshape(20, 20)
+    np.random.shuffle(ar2_np)
+    ar2 = dpt.asarray(ar2_np, order="C")
+
+    r1 = dpt.maximum(ar1, ar2, order="C")
+    assert r1.flags.c_contiguous
+    r2 = dpt.maximum(ar1, ar2, order="F")
+    assert r2.flags.f_contiguous
+    r3 = dpt.maximum(ar1, ar2, order="A")
+    assert r3.flags.c_contiguous
+    r4 = dpt.maximum(ar1, ar2, order="K")
+    assert r4.flags.c_contiguous
+
+    ar1 = dpt.asarray(ar1_np, order="F")
+    ar2 = dpt.asarray(ar2_np, order="F")
+    r1 = dpt.maximum(ar1, ar2, order="C")
+    assert r1.flags.c_contiguous
+    r2 = dpt.maximum(ar1, ar2, order="F")
+    assert r2.flags.f_contiguous
+    r3 = dpt.maximum(ar1, ar2, order="A")
+    assert r3.flags.f_contiguous
+    r4 = dpt.maximum(ar1, ar2, order="K")
+    assert r4.flags.f_contiguous
+
+    ar1_np = np.arange(40 * 40, dtype="i4").reshape(40, 40)
+    np.random.shuffle(ar1_np)
+    ar1 = dpt.asarray(ar1_np, order="C")[:20, ::-2]
+    ar2_np = np.arange(40 * 40, dtype="i4").reshape(40, 40)
+    np.random.shuffle(ar2_np)
+    ar2 = dpt.asarray(ar2_np, order="C")[:20, ::-2]
+    r4 = dpt.maximum(ar1, ar2, order="K")
+    assert r4.strides == (20, -1)
+
+    ar1 = dpt.asarray(ar1_np, order="C")[:20, ::-2].mT
+    ar2 = dpt.asarray(ar2_np, order="C")[:20, ::-2].mT
+    r4 = dpt.maximum(ar1, ar2, order="K")
+    assert r4.strides == (-1, 20)
+
+
+@pytest.mark.parametrize("arr_dt", _all_dtypes)
+def test_maximum_minimum_python_scalar(arr_dt):
+    q = get_queue_or_skip()
+    skip_if_dtype_not_supported(arr_dt, q)
+
+    X = dpt.zeros((10, 10), dtype=arr_dt, sycl_queue=q)
+    py_ones = (
+        bool(1),
+        int(1),
+        float(1),
+        complex(1),
+        np.float32(1),
+        ctypes.c_int(1),
+    )
+    for sc in py_ones:
+        R = dpt.maximum(X, sc)
+        assert isinstance(R, dpt.usm_ndarray)
+        R = dpt.maximum(sc, X)
+        assert isinstance(R, dpt.usm_ndarray)
+
+        R = dpt.minimum(X, sc)
+        assert isinstance(R, dpt.usm_ndarray)
+        R = dpt.minimum(sc, X)
+        assert isinstance(R, dpt.usm_ndarray)
+
+
+class MockArray:
+    def __init__(self, arr):
+        self.data_ = arr
+
+    @property
+    def __sycl_usm_array_interface__(self):
+        return self.data_.__sycl_usm_array_interface__
+
+
+def test_maximum_minimum_mock_array():
+    get_queue_or_skip()
+    a = dpt.arange(10)
+    b = dpt.ones(10)
+    c = MockArray(b)
+    r = dpt.maximum(a, c)
+    assert isinstance(r, dpt.usm_ndarray)
+
+    r = dpt.minimum(a, c)
+    assert isinstance(r, dpt.usm_ndarray)
+
+
+def test_maximum_canary_mock_array():
+    get_queue_or_skip()
+    a = dpt.arange(10)
+
+    class Canary:
+        def __init__(self):
+            pass
+
+        @property
+        def __sycl_usm_array_interface__(self):
+            return None
+
+    c = Canary()
+    with pytest.raises(ValueError):
+        dpt.maximum(a, c)
+
+    with pytest.raises(ValueError):
+        dpt.minimum(a, c)
diff --git a/dpctl/tests/elementwise/test_round.py b/dpctl/tests/elementwise/test_round.py
index fb2b104bb1..6ca4feaf22 100644
--- a/dpctl/tests/elementwise/test_round.py
+++ b/dpctl/tests/elementwise/test_round.py
@@ -1,3 +1,19 @@
+#                       Data Parallel Control (dpctl)
+#
+#  Copyright 2020-2023 Intel Corporation
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
 import itertools
 
 import numpy as np