From 195c4d86425a1332df302648df7460c67037260c Mon Sep 17 00:00:00 2001
From: Anton Volkov <antonwolfy@gmail.com>
Date: Fri, 27 Jan 2023 08:08:57 -0600
Subject: [PATCH] Add support of logical comparison operations

---
 .../include/dpnp_gen_1arg_1type_tbl.hpp       |  69 ++++++-
 .../include/dpnp_gen_2arg_2type_tbl.hpp       |   5 +-
 dpnp/backend/include/dpnp_iface_fptr.hpp      |   4 +
 dpnp/backend/kernels/dpnp_krnl_logic.cpp      | 183 ++++++++++++++++
 dpnp/dpnp_algo/dpnp_algo.pxd                  |   4 +
 dpnp/dpnp_algo/dpnp_algo_logic.pyx            |  76 +++----
 dpnp/dpnp_iface_logic.py                      | 195 +++++++++++++-----
 tests/helper.py                               |  31 +++
 tests/skipped_tests_gpu.tbl                   |   2 +-
 tests/test_logic.py                           |  70 +++++--
 .../cupy/logic_tests/test_comparison.py       |   1 -
 .../third_party/cupy/logic_tests/test_ops.py  |   4 -
 12 files changed, 510 insertions(+), 134 deletions(-)
 create mode 100644 tests/helper.py

diff --git a/dpnp/backend/include/dpnp_gen_1arg_1type_tbl.hpp b/dpnp/backend/include/dpnp_gen_1arg_1type_tbl.hpp
index f5ee23d755f2..0f6cb5b31deb 100644
--- a/dpnp/backend/include/dpnp_gen_1arg_1type_tbl.hpp
+++ b/dpnp/backend/include/dpnp_gen_1arg_1type_tbl.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2020, Intel Corporation
+// Copyright (c) 2016-2023, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -23,6 +23,8 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
+#if defined(MACRO_1ARG_1TYPE_OP)
+
 /*
  * This header file contains single argument element wise functions definitions
  *
@@ -35,10 +37,6 @@
  *
  */
 
-#ifndef MACRO_1ARG_1TYPE_OP
-#error "MACRO_1ARG_1TYPE_OP is not defined"
-#endif
-
 #ifdef _SECTION_DOCUMENTATION_GENERATION_
 
 #define MACRO_1ARG_1TYPE_OP(__name__, __operation1__, __operation2__)                                                   \
@@ -88,7 +86,7 @@
                   const shape_elem_type* input1_strides,                                                                \
                   const size_t* where);
 
-#endif
+#endif // _SECTION_DOCUMENTATION_GENERATION_
 
 MACRO_1ARG_1TYPE_OP(dpnp_conjugate_c, std::conj(input_elem), q.submit(kernel_func))
 MACRO_1ARG_1TYPE_OP(dpnp_copy_c, input_elem, q.submit(kernel_func))
@@ -107,3 +105,62 @@ MACRO_1ARG_1TYPE_OP(dpnp_square_c,
                     oneapi::mkl::vm::sqr(q, input1_size, input1_data, result))
 
 #undef MACRO_1ARG_1TYPE_OP
+
+#elif defined(MACRO_1ARG_1TYPE_LOGIC_OP)
+
+/*
+ * This header file contains single argument element wise functions definitions
+ *
+ * Macro `MACRO_1ARG_1TYPE_LOGIC_OP` must be defined before usage
+ *
+ * Parameters:
+ * - public name of the function and kernel name
+ * - operation used to calculate the result
+ *
+ */
+
+#ifdef _SECTION_DOCUMENTATION_GENERATION_
+
+#define MACRO_1ARG_1TYPE_LOGIC_OP(__name__, __operation__)                                                             \
+    /** @ingroup BACKEND_API                                                                                         */ \
+    /** @brief Per element operation function __name__                                                               */ \
+    /**                                                                                                              */ \
+    /** Function "__name__" executes operator "__operation__" over corresponding elements of input array             */ \
+    /**                                                                                                              */ \
+    /** @param[in]  q_ref              Reference to SYCL queue.                                                      */ \
+    /** @param[out] result_out         Output array.                                                                 */ \
+    /** @param[in]  result_size        Output array size.                                                            */ \
+    /** @param[in]  result_ndim        Number of output array dimensions.                                            */ \
+    /** @param[in]  result_shape       Output array shape.                                                           */ \
+    /** @param[in]  result_strides     Output array strides.                                                         */ \
+    /** @param[in]  input1_in          Input array 1.                                                                */ \
+    /** @param[in]  input1_size        Input array 1 size.                                                           */ \
+    /** @param[in]  input1_ndim        Number of input array 1 dimensions.                                           */ \
+    /** @param[in]  input1_shape       Input array 1 shape.                                                          */ \
+    /** @param[in]  input1_strides     Input array 1 strides.                                                        */ \
+    /** @param[in]  where              Where condition.                                                              */ \
+    /** @param[in]  dep_event_vec_ref  Reference to vector of SYCL events.                                           */ \
+    template <typename _DataType_input1>                                                                                \
+    DPCTLSyclEventRef __name__(DPCTLSyclQueueRef q_ref,                                                                 \
+                               void* result_out,                                                                        \
+                               const size_t result_size,                                                                \
+                               const size_t result_ndim,                                                                \
+                               const shape_elem_type* result_shape,                                                     \
+                               const shape_elem_type* result_strides,                                                   \
+                               const void* input1_in,                                                                   \
+                               const size_t input1_size,                                                                \
+                               const size_t input1_ndim,                                                                \
+                               const shape_elem_type* input1_shape,                                                     \
+                               const shape_elem_type* input1_strides,                                                   \
+                               const size_t* where,                                                                     \
+                               const DPCTLEventVectorRef dep_event_vec_ref);
+
+#endif // _SECTION_DOCUMENTATION_GENERATION_
+
+MACRO_1ARG_1TYPE_LOGIC_OP(dpnp_logical_not_c, !input1_elem)
+
+#undef MACRO_1ARG_1TYPE_LOGIC_OP
+
+#else
+#error "MACRO_1ARG_1TYPE_OP or MACRO_1ARG_1TYPE_LOGIC_OP is not defined"
+#endif // MACRO_1ARG_1TYPE_OP || MACRO_1ARG_1TYPE_LOGIC_OP
diff --git a/dpnp/backend/include/dpnp_gen_2arg_2type_tbl.hpp b/dpnp/backend/include/dpnp_gen_2arg_2type_tbl.hpp
index 2fb4fe9d6fde..4b6c4290ef31 100644
--- a/dpnp/backend/include/dpnp_gen_2arg_2type_tbl.hpp
+++ b/dpnp/backend/include/dpnp_gen_2arg_2type_tbl.hpp
@@ -64,7 +64,7 @@
     /** @param[in]  input2_strides     Input array 2 strides.                                                        */ \
     /** @param[in]  where              Where condition.                                                              */ \
     /** @param[in]  dep_event_vec_ref  Reference to vector of SYCL events.                                           */ \
-    template <typename _DataType_output, typename _DataType_input1, typename _DataType_input2>                          \
+    template <typename _DataType_input1, typename _DataType_input2>                                                     \
     DPCTLSyclEventRef __name__(DPCTLSyclQueueRef q_ref,                                                                 \
                                void* result_out,                                                                        \
                                const size_t result_size,                                                                \
@@ -91,6 +91,9 @@ MACRO_2ARG_2TYPES_LOGIC_OP(dpnp_greater_c, input1_elem > input2_elem)
 MACRO_2ARG_2TYPES_LOGIC_OP(dpnp_greater_equal_c, input1_elem >= input2_elem)
 MACRO_2ARG_2TYPES_LOGIC_OP(dpnp_less_c, input1_elem < input2_elem)
 MACRO_2ARG_2TYPES_LOGIC_OP(dpnp_less_equal_c, input1_elem <= input2_elem)
+MACRO_2ARG_2TYPES_LOGIC_OP(dpnp_logical_and_c, input1_elem && input2_elem)
+MACRO_2ARG_2TYPES_LOGIC_OP(dpnp_logical_or_c, input1_elem || input2_elem)
+MACRO_2ARG_2TYPES_LOGIC_OP(dpnp_logical_xor_c, (!!input1_elem) != (!!input2_elem))
 MACRO_2ARG_2TYPES_LOGIC_OP(dpnp_not_equal_c, input1_elem != input2_elem)
 
 #undef MACRO_2ARG_2TYPES_LOGIC_OP
diff --git a/dpnp/backend/include/dpnp_iface_fptr.hpp b/dpnp/backend/include/dpnp_iface_fptr.hpp
index 7a3564fa1d35..898da9362b55 100644
--- a/dpnp/backend/include/dpnp_iface_fptr.hpp
+++ b/dpnp/backend/include/dpnp_iface_fptr.hpp
@@ -206,6 +206,10 @@ enum class DPNPFuncName : size_t
     DPNP_FN_LOG2_EXT,                     /**< Used in numpy.log2() impl, requires extra parameters  */
     DPNP_FN_LOG1P,                        /**< Used in numpy.log1p() impl  */
     DPNP_FN_LOG1P_EXT,                    /**< Used in numpy.log1p() impl, requires extra parameters  */
+    DPNP_FN_LOGICAL_AND_EXT,              /**< Used in numpy.logical_and() impl, requires extra parameters */
+    DPNP_FN_LOGICAL_NOT_EXT,              /**< Used in numpy.logical_not() impl, requires extra parameters */
+    DPNP_FN_LOGICAL_OR_EXT,               /**< Used in numpy.logical_or() impl, requires extra parameters */
+    DPNP_FN_LOGICAL_XOR_EXT,              /**< Used in numpy.logical_xor() impl, requires extra parameters */
     DPNP_FN_MATMUL,                       /**< Used in numpy.matmul() impl  */
     DPNP_FN_MATMUL_EXT,                   /**< Used in numpy.matmul() impl, requires extra parameters */
     DPNP_FN_MATRIX_RANK,                  /**< Used in numpy.linalg.matrix_rank() impl  */
diff --git a/dpnp/backend/kernels/dpnp_krnl_logic.cpp b/dpnp/backend/kernels/dpnp_krnl_logic.cpp
index 19a6dd3646e3..be1bb1bab79b 100644
--- a/dpnp/backend/kernels/dpnp_krnl_logic.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_logic.cpp
@@ -288,6 +288,182 @@ DPCTLSyclEventRef (*dpnp_any_ext_c)(DPCTLSyclQueueRef,
                                     const DPCTLEventVectorRef) = dpnp_any_c<_DataType, _ResultType>;
 
 
+#define MACRO_1ARG_1TYPE_LOGIC_OP(__name__, __operation__)                                                             \
+    template <typename _KernelNameSpecialization>                                                                      \
+    class __name__##_kernel;                                                                                           \
+                                                                                                                       \
+    template <typename _KernelNameSpecialization>                                                                      \
+    class __name__##_broadcast_kernel;                                                                                 \
+                                                                                                                       \
+    template <typename _KernelNameSpecialization>                                                                      \
+    class __name__##_strides_kernel;                                                                                   \
+                                                                                                                       \
+    template <typename _DataType_input1>                                                                               \
+    DPCTLSyclEventRef __name__(DPCTLSyclQueueRef q_ref,                                                                \
+                               void* result_out,                                                                       \
+                               const size_t result_size,                                                               \
+                               const size_t result_ndim,                                                               \
+                               const shape_elem_type* result_shape,                                                    \
+                               const shape_elem_type* result_strides,                                                  \
+                               const void* input1_in,                                                                  \
+                               const size_t input1_size,                                                               \
+                               const size_t input1_ndim,                                                               \
+                               const shape_elem_type* input1_shape,                                                    \
+                               const shape_elem_type* input1_strides,                                                  \
+                               const size_t* where,                                                                    \
+                               const DPCTLEventVectorRef dep_event_vec_ref)                                            \
+    {                                                                                                                  \
+        /* avoid warning unused variable*/                                                                             \
+        (result_shape);                                                                                                \
+        (void)where;                                                                                                   \
+        (void)dep_event_vec_ref;                                                                                       \
+                                                                                                                       \
+        DPCTLSyclEventRef event_ref = nullptr;                                                                         \
+                                                                                                                       \
+        if (!input1_size)                                                                                              \
+        {                                                                                                              \
+            return event_ref;                                                                                          \
+        }                                                                                                              \
+                                                                                                                       \
+        sycl::queue q = *(reinterpret_cast<sycl::queue *>(q_ref));                                                     \
+                                                                                                                       \
+        _DataType_input1* input1_data = static_cast<_DataType_input1 *>(const_cast<void *>(input1_in));                \
+        bool* result = static_cast<bool *>(result_out);                                                                \
+                                                                                                                       \
+        shape_elem_type* input1_shape_offsets = new shape_elem_type[input1_ndim];                                      \
+                                                                                                                       \
+        get_shape_offsets_inkernel(input1_shape, input1_ndim, input1_shape_offsets);                                   \
+        bool use_strides = !array_equal(input1_strides, input1_ndim, input1_shape_offsets, input1_ndim);               \
+        delete[] input1_shape_offsets;                                                                                 \
+                                                                                                                       \
+        if (use_strides)                                                                                               \
+        {                                                                                                              \
+            if (result_ndim != input1_ndim)                                                                            \
+            {                                                                                                          \
+                throw std::runtime_error("Result ndim=" + std::to_string(result_ndim) +                                \
+                                         " mismatches with input1 ndim=" + std::to_string(input1_ndim));               \
+            }                                                                                                          \
+                                                                                                                       \
+            /* memory transfer optimization, use USM-host for temporary speeds up tranfer to device */                 \
+            using usm_host_allocatorT = sycl::usm_allocator<shape_elem_type, sycl::usm::alloc::host>;                  \
+                                                                                                                       \
+            size_t strides_size = 2 * result_ndim;                                                                     \
+            shape_elem_type *dev_strides_data = sycl::malloc_device<shape_elem_type>(strides_size, q);                 \
+                                                                                                                       \
+            /* create host temporary for packed strides managed by shared pointer */                                   \
+            auto strides_host_packed = std::vector<shape_elem_type, usm_host_allocatorT>(strides_size,                 \
+                                                                                         usm_host_allocatorT(q));      \
+                                                                                                                       \
+            /* packed vector is concatenation of result_strides and input1_strides */                                  \
+            std::copy(result_strides, result_strides + result_ndim, strides_host_packed.begin());                      \
+            std::copy(input1_strides, input1_strides + result_ndim, strides_host_packed.begin() + result_ndim);        \
+                                                                                                                       \
+            auto copy_strides_ev = q.copy<shape_elem_type>(strides_host_packed.data(),                                 \
+                                                           dev_strides_data,                                           \
+                                                           strides_host_packed.size());                                \
+                                                                                                                       \
+            auto kernel_parallel_for_func = [=](sycl::id<1> global_id) {                                               \
+                const size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */                  \
+                {                                                                                                      \
+                    const shape_elem_type *result_strides_data = &dev_strides_data[0];                                 \
+                    const shape_elem_type *input1_strides_data = &dev_strides_data[1];                                 \
+                                                                                                                       \
+                    size_t input1_id = 0;                                                                              \
+                                                                                                                       \
+                    for (size_t i = 0; i < result_ndim; ++i)                                                           \
+                    {                                                                                                  \
+                        const size_t output_xyz_id =                                                                   \
+                            get_xyz_id_by_id_inkernel(output_id, result_strides_data, result_ndim, i);                 \
+                        input1_id += output_xyz_id * input1_strides_data[i];                                           \
+                    }                                                                                                  \
+                                                                                                                       \
+                    const _DataType_input1 input1_elem = input1_data[input1_id];                                       \
+                    result[output_id] = __operation__;                                                                 \
+                }                                                                                                      \
+            };                                                                                                         \
+            auto kernel_func = [&](sycl::handler& cgh) {                                                               \
+                cgh.depends_on(copy_strides_ev);                                                                       \
+                cgh.parallel_for<class __name__##_strides_kernel<_DataType_input1>>(                                   \
+                    sycl::range<1>(result_size), kernel_parallel_for_func);                                            \
+            };                                                                                                         \
+                                                                                                                       \
+            q.submit(kernel_func).wait();                                                                              \
+                                                                                                                       \
+            sycl::free(dev_strides_data, q);                                                                           \
+            return event_ref;                                                                                          \
+        }                                                                                                              \
+        else                                                                                                           \
+        {                                                                                                              \
+            constexpr size_t lws = 64;                                                                                 \
+            constexpr unsigned int vec_sz = 8;                                                                         \
+            constexpr sycl::access::address_space global_space = sycl::access::address_space::global_space;            \
+                                                                                                                       \
+            auto gws_range = sycl::range<1>(((result_size + lws * vec_sz - 1) / (lws * vec_sz)) * lws);                \
+            auto lws_range = sycl::range<1>(lws);                                                                      \
+                                                                                                                       \
+            auto kernel_parallel_for_func = [=](sycl::nd_item<1> nd_it) {                                              \
+                auto sg = nd_it.get_sub_group();                                                                       \
+                size_t start = vec_sz * (nd_it.get_group(0) * nd_it.get_local_range(0) +                               \
+                                         sg.get_group_id()[0] * sg.get_max_local_range()[0]);                          \
+                size_t end = start + static_cast<size_t>(vec_sz);                                                      \
+                                                                                                                       \
+                if (end < result_size) {                                                                               \
+                    sycl::vec<_DataType_input1, vec_sz> x1 =                                                           \
+                        sg.load<vec_sz>(sycl::multi_ptr<_DataType_input1, global_space>(&input1_data[start]));         \
+                    sycl::vec<bool, vec_sz> res_vec;                                                                   \
+                                                                                                                       \
+                    for (size_t k = 0; k < vec_sz; ++k) {                                                              \
+                        const _DataType_input1 input1_elem = x1[k];                                                    \
+                        res_vec[k] = __operation__;                                                                    \
+                    }                                                                                                  \
+                    sg.store<vec_sz>(sycl::multi_ptr<bool, global_space>(&result[start]), res_vec);                    \
+                                                                                                                       \
+                }                                                                                                      \
+                else {                                                                                                 \
+                    for (size_t k = start; k < result_size; ++k) {                                                     \
+                        const _DataType_input1 input1_elem = input1_data[k];                                           \
+                        result[k] = __operation__;                                                                     \
+                    }                                                                                                  \
+                }                                                                                                      \
+            };                                                                                                         \
+                                                                                                                       \
+            auto kernel_func = [&](sycl::handler& cgh) {                                                               \
+                cgh.parallel_for<class __name__##_kernel<_DataType_input1>>(                                           \
+                    sycl::nd_range<1>(gws_range, lws_range), kernel_parallel_for_func);                                \
+            };                                                                                                         \
+            sycl::event event = q.submit(kernel_func);                                                                 \
+                                                                                                                       \
+            event_ref = reinterpret_cast<DPCTLSyclEventRef>(&event);                                                   \
+            return DPCTLEvent_Copy(event_ref);                                                                         \
+        }                                                                                                              \
+        return event_ref;                                                                                              \
+    }                                                                                                                  \
+                                                                                                                       \
+    template <typename _DataType_input1>                                                                               \
+    DPCTLSyclEventRef (*__name__##_ext)(DPCTLSyclQueueRef,                                                             \
+                                        void*,                                                                         \
+                                        const size_t,                                                                  \
+                                        const size_t,                                                                  \
+                                        const shape_elem_type*,                                                        \
+                                        const shape_elem_type*,                                                        \
+                                        const void*,                                                                   \
+                                        const size_t,                                                                  \
+                                        const size_t,                                                                  \
+                                        const shape_elem_type*,                                                        \
+                                        const shape_elem_type*,                                                        \
+                                        const size_t*,                                                                 \
+                                        const DPCTLEventVectorRef) = __name__<_DataType_input1>;
+
+#include <dpnp_gen_1arg_1type_tbl.hpp>
+
+template <DPNPFuncType ... FTs>
+static void func_map_logic_1arg_1type_helper(func_map_t& fmap)
+{
+    ((fmap[DPNPFuncName::DPNP_FN_LOGICAL_NOT_EXT][FTs][FTs] =
+        {eft_BLN, (void*)dpnp_logical_not_c_ext<func_type_map_t::find_type<FTs>>}), ...);
+}
+
+
 #define MACRO_2ARG_2TYPES_LOGIC_OP(__name__, __operation__)                                                            \
     template <typename _KernelNameSpecialization1,                                                                     \
               typename _KernelNameSpecialization2>                                                                     \
@@ -546,6 +722,12 @@ static void func_map_logic_2arg_2type_core(func_map_t& fmap)
         {eft_BLN, (void*)dpnp_less_c_ext<func_type_map_t::find_type<FT1>, func_type_map_t::find_type<FTs>>}), ...);
     ((fmap[DPNPFuncName::DPNP_FN_LESS_EQUAL_EXT][FT1][FTs] =
         {eft_BLN, (void*)dpnp_less_equal_c_ext<func_type_map_t::find_type<FT1>, func_type_map_t::find_type<FTs>>}), ...);
+    ((fmap[DPNPFuncName::DPNP_FN_LOGICAL_AND_EXT][FT1][FTs] =
+        {eft_BLN, (void*)dpnp_logical_and_c_ext<func_type_map_t::find_type<FT1>, func_type_map_t::find_type<FTs>>}), ...);
+    ((fmap[DPNPFuncName::DPNP_FN_LOGICAL_OR_EXT][FT1][FTs] =
+        {eft_BLN, (void*)dpnp_logical_or_c_ext<func_type_map_t::find_type<FT1>, func_type_map_t::find_type<FTs>>}), ...);
+    ((fmap[DPNPFuncName::DPNP_FN_LOGICAL_XOR_EXT][FT1][FTs] =
+        {eft_BLN, (void*)dpnp_logical_xor_c_ext<func_type_map_t::find_type<FT1>, func_type_map_t::find_type<FTs>>}), ...);
     ((fmap[DPNPFuncName::DPNP_FN_NOT_EQUAL_EXT][FT1][FTs] =
         {eft_BLN, (void*)dpnp_not_equal_c_ext<func_type_map_t::find_type<FT1>, func_type_map_t::find_type<FTs>>}), ...);
 }
@@ -648,6 +830,7 @@ void func_map_init_logic(func_map_t& fmap)
     fmap[DPNPFuncName::DPNP_FN_ANY_EXT][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_any_ext_c<float, bool>};
     fmap[DPNPFuncName::DPNP_FN_ANY_EXT][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_any_ext_c<double, bool>};
 
+    func_map_logic_1arg_1type_helper<eft_BLN, eft_INT, eft_LNG, eft_FLT, eft_DBL>(fmap);
     func_map_logic_2arg_2type_helper<eft_BLN, eft_INT, eft_LNG, eft_FLT, eft_DBL>(fmap);
 
     return;
diff --git a/dpnp/dpnp_algo/dpnp_algo.pxd b/dpnp/dpnp_algo/dpnp_algo.pxd
index 50387e1565a0..485e8adb1a66 100644
--- a/dpnp/dpnp_algo/dpnp_algo.pxd
+++ b/dpnp/dpnp_algo/dpnp_algo.pxd
@@ -182,6 +182,10 @@ cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName":  # need this na
         DPNP_FN_LOG1P_EXT
         DPNP_FN_LOG2
         DPNP_FN_LOG2_EXT
+        DPNP_FN_LOGICAL_AND_EXT
+        DPNP_FN_LOGICAL_NOT_EXT
+        DPNP_FN_LOGICAL_OR_EXT
+        DPNP_FN_LOGICAL_XOR_EXT
         DPNP_FN_MATMUL
         DPNP_FN_MATMUL_EXT
         DPNP_FN_MATRIX_RANK
diff --git a/dpnp/dpnp_algo/dpnp_algo_logic.pyx b/dpnp/dpnp_algo/dpnp_algo_logic.pyx
index ae0f711eb109..b6ac36db412b 100644
--- a/dpnp/dpnp_algo/dpnp_algo_logic.pyx
+++ b/dpnp/dpnp_algo/dpnp_algo_logic.pyx
@@ -270,65 +270,35 @@ cpdef utils.dpnp_descriptor dpnp_less_equal(utils.dpnp_descriptor x1_obj,
     return call_fptr_2in_1out_strides(DPNP_FN_LESS_EQUAL_EXT, x1_obj, x2_obj, dtype, out, where, func_name="less_equal")
 
 
+cpdef utils.dpnp_descriptor dpnp_logical_and(utils.dpnp_descriptor x1_obj,
+                                             utils.dpnp_descriptor x2_obj,
+                                             object dtype=None,
+                                             utils.dpnp_descriptor out=None,
+                                             object where=True):
+    return call_fptr_2in_1out_strides(DPNP_FN_LOGICAL_AND_EXT, x1_obj, x2_obj, dtype, out, where, func_name="logical_and")
 
-cpdef utils.dpnp_descriptor dpnp_logical_and(utils.dpnp_descriptor input1, utils.dpnp_descriptor input2):
-    result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(input1, input2)
-    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape,
-                                                                             dpnp.bool,
-                                                                             None,
-                                                                             device=result_sycl_device,
-                                                                             usm_type=result_usm_type,
-                                                                             sycl_queue=result_sycl_queue)
-
-    for i in range(result.size):
-        result.get_pyobj()[i] = numpy.logical_and(input1.get_pyobj()[i], input2.get_pyobj()[i])
-
-    return result
-
-
-cpdef utils.dpnp_descriptor dpnp_logical_not(utils.dpnp_descriptor input1):
-    input1_obj = input1.get_array()
-    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape,
-                                                                             dpnp.bool,
-                                                                             None,
-                                                                             device=input1_obj.sycl_device,
-                                                                             usm_type=input1_obj.usm_type,
-                                                                             sycl_queue=input1_obj.sycl_queue)
-
-    for i in range(result.size):
-        result.get_pyobj()[i] = numpy.logical_not(input1.get_pyobj()[i])
-
-    return result
-
-
-cpdef utils.dpnp_descriptor dpnp_logical_or(utils.dpnp_descriptor input1, utils.dpnp_descriptor input2):
-    result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(input1, input2)
-    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape,
-                                                                             dpnp.bool,
-                                                                             None,
-                                                                             device=result_sycl_device,
-                                                                             usm_type=result_usm_type,
-                                                                             sycl_queue=result_sycl_queue)
 
-    for i in range(result.size):
-        result.get_pyobj()[i] = numpy.logical_or(input1.get_pyobj()[i], input2.get_pyobj()[i])
-
-    return result
+cpdef utils.dpnp_descriptor dpnp_logical_not(utils.dpnp_descriptor x_obj,
+                                            object dtype=None,
+                                            utils.dpnp_descriptor out=None,
+                                            object where=True):
+    return call_fptr_1in_1out_strides(DPNP_FN_LOGICAL_NOT_EXT, x_obj, dtype, out, where, func_name="logical_not")
 
 
-cpdef utils.dpnp_descriptor dpnp_logical_xor(utils.dpnp_descriptor input1, utils.dpnp_descriptor input2):
-    result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(input1, input2)
-    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape,
-                                                                             dpnp.bool,
-                                                                             None,
-                                                                             device=result_sycl_device,
-                                                                             usm_type=result_usm_type,
-                                                                             sycl_queue=result_sycl_queue)
+cpdef utils.dpnp_descriptor dpnp_logical_or(utils.dpnp_descriptor x1_obj,
+                                            utils.dpnp_descriptor x2_obj,
+                                            object dtype=None,
+                                            utils.dpnp_descriptor out=None,
+                                            object where=True):
+    return call_fptr_2in_1out_strides(DPNP_FN_LOGICAL_OR_EXT, x1_obj, x2_obj, dtype, out, where, func_name="logical_or")
 
-    for i in range(result.size):
-        result.get_pyobj()[i] = numpy.logical_xor(input1.get_pyobj()[i], input2.get_pyobj()[i])
 
-    return result
+cpdef utils.dpnp_descriptor dpnp_logical_xor(utils.dpnp_descriptor x1_obj,
+                                             utils.dpnp_descriptor x2_obj,
+                                             object dtype=None,
+                                             utils.dpnp_descriptor out=None,
+                                             object where=True):
+    return call_fptr_2in_1out_strides(DPNP_FN_LOGICAL_XOR_EXT, x1_obj, x2_obj, dtype, out, where, func_name="logical_xor")
 
 
 cpdef utils.dpnp_descriptor dpnp_not_equal(utils.dpnp_descriptor x1_obj,
diff --git a/dpnp/dpnp_iface_logic.py b/dpnp/dpnp_iface_logic.py
index 153bac1b24fa..de7537a42878 100644
--- a/dpnp/dpnp_iface_logic.py
+++ b/dpnp/dpnp_iface_logic.py
@@ -737,19 +737,32 @@ def less_equal(x1,
     return call_origin(numpy.less_equal, x1, x2)
 
 
-def logical_and(x1, x2, out=None, **kwargs):
+def logical_and(x1,
+                x2,
+                /,
+                out=None,
+                *,
+                where=True,
+                dtype=None,
+                subok=True):
     """
     Compute the truth value of x1 AND x2 element-wise.
 
     For full documentation refer to :obj:`numpy.logical_and`.
 
+    Returns
+    -------
+    out : dpnp.ndarray
+        Output array of bool type, element-wise logical comparison of `x1` and `x2`.
+
     Limitations
     -----------
-    Input arrays are supported as :obj:`dpnp.ndarray`.
+    Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar,
+    but not both (at least either `x1` or `x2` should be as :class:`dpnp.ndarray`).
+    Parameters `out`, `where`, `dtype` and `subok` are supported with their default values.
     Otherwise the function will be executed sequentially on CPU.
-    Input array data types are limited by supported DPNP :ref:`Data types`.
-    Parameter ``out`` is supported only with default value ``None``.
-    Parameter ``where`` is supported only with default value ``True``.
+    Input array data types are limited by supported DPNP :ref:`Data types`,
+    excluding `dpnp.complex64` and `dpnp.complex128`.
 
     See Also
     --------
@@ -769,30 +782,53 @@ def logical_and(x1, x2, out=None, **kwargs):
 
     """
 
-    # x1_desc = dpnp.get_dpnp_descriptor(x1)
-    # x2_desc = dpnp.get_dpnp_descriptor(x2)
-    # if x1_desc and x2_desc and not kwargs:
-    #     if out is not None:
-    #         pass
-    #     else:
-    #         return dpnp_logical_and(x1_desc, x2_desc).get_pyobj()
+    if out is not None:
+        pass
+    elif where is not True:
+        pass
+    elif dtype is not None:
+        pass
+    elif subok is not True:
+        pass
+    elif dpnp.isscalar(x1) and dpnp.isscalar(x2):
+        # at least either x1 or x2 has to be an array
+        pass
+    else:
+        # get a common queue to copy data from the host into a device if any input is scalar
+        queue = get_common_allocation_queue([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else None
 
-    return call_origin(numpy.logical_and, x1, x2, out, **kwargs)
+        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue)
+        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue)
+        if x1_desc and x2_desc:
+            return dpnp_logical_and(x1_desc, x2_desc).get_pyobj()
+    return call_origin(numpy.logical_and, x1, x2)
 
 
-def logical_not(x1, out=None, **kwargs):
+def logical_not(x,
+                /,
+                out=None,
+                *,
+                where=True,
+                dtype=None,
+                subok=True):
     """
     Compute the truth value of NOT x element-wise.
 
     For full documentation refer to :obj:`numpy.logical_not`.
 
+    Returns
+    -------
+    out : dpnp.ndarray
+        Boolean result with the same shape as `x` of the NOT operation
+        on elements of `x`.
+
     Limitations
     -----------
-    Input array is supported as :obj:`dpnp.ndarray`.
+    Parameters `x` is only supported as :class:`dpnp.ndarray`.
+    Parameters `out`, `where`, `dtype` and `subok` are supported with their default values.
     Otherwise the function will be executed sequentially on CPU.
-    Input array data types are limited by supported DPNP :ref:`Data types`.
-    Parameter ``out`` is supported only with default value ``None``.
-    Parameter ``where`` is supported only with default value ``True``.
+    Input array data type is limited by supported DPNP :ref:`Data types`,
+    excluding `dpnp.complex64` and `dpnp.complex128`.
 
     See Also
     --------
@@ -810,29 +846,47 @@ def logical_not(x1, out=None, **kwargs):
 
     """
 
-    # x1_desc = dpnp.get_dpnp_descriptor(x1)
-    # if x1_desc and not kwargs:
-    #     if out is not None:
-    #         pass
-    #     else:
-    #         return dpnp_logical_not(x1_desc).get_pyobj()
-
-    return call_origin(numpy.logical_not, x1, out, **kwargs)
+    if out is not None:
+        pass
+    elif where is not True:
+        pass
+    elif dtype is not None:
+        pass
+    elif subok is not True:
+        pass
+    else:
+        x1_desc = dpnp.get_dpnp_descriptor(x, copy_when_strides=False, copy_when_nondefault_queue=False)
+        if x1_desc:
+            return dpnp_logical_not(x1_desc).get_pyobj()
+    return call_origin(numpy.logical_not, x)
 
 
-def logical_or(x1, x2, out=None, **kwargs):
+def logical_or(x1,
+               x2,
+               /,
+               out=None,
+               *,
+               where=True,
+               dtype=None,
+               subok=True):
     """
     Compute the truth value of x1 OR x2 element-wise.
 
     For full documentation refer to :obj:`numpy.logical_or`.
 
+    Returns
+    -------
+    out : dpnp.ndarray
+        Output array of bool type, element-wise logical comparison of `x1` and `x2`.
+
     Limitations
     -----------
-    Input arrays are supported as :obj:`dpnp.ndarray`.
+    Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar,
+    but not both (at least either `x1` or `x2` should be as :class:`dpnp.ndarray`).
+    Parameters `out`, `where`, `dtype` and `subok` are supported with their default values.
     Otherwise the function will be executed sequentially on CPU.
-    Input array data types are limited by supported DPNP :ref:`Data types`.
-    Parameter ``out`` is supported only with default value ``None``.
-    Parameter ``where`` is supported only with default value ``True``.
+    Input array data types are limited by supported DPNP :ref:`Data types`,
+    excluding `dpnp.complex64` and `dpnp.complex128`.
 
     See Also
     --------
@@ -852,30 +906,54 @@ def logical_or(x1, x2, out=None, **kwargs):
 
     """
 
-    # x1_desc = dpnp.get_dpnp_descriptor(x1)
-    # x2_desc = dpnp.get_dpnp_descriptor(x2)
-    # if x1_desc and x2_desc and not kwargs:
-    #     if out is not None:
-    #         pass
-    #     else:
-    #         return dpnp_logical_or(x1_desc, x2_desc).get_pyobj()
+    if out is not None:
+        pass
+    elif where is not True:
+        pass
+    elif dtype is not None:
+        pass
+    elif subok is not True:
+        pass
+    elif dpnp.isscalar(x1) and dpnp.isscalar(x2):
+        # at least either x1 or x2 has to be an array
+        pass
+    else:
+        # get a common queue to copy data from the host into a device if any input is scalar
+        queue = get_common_allocation_queue([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else None
 
-    return call_origin(numpy.logical_or, x1, x2, out, **kwargs)
+        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue)
+        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue)
+        if x1_desc and x2_desc:
+            return dpnp_logical_or(x1_desc, x2_desc).get_pyobj()
+    return call_origin(numpy.logical_or, x1, x2)
 
 
-def logical_xor(x1, x2, out=None, **kwargs):
+def logical_xor(x1,
+               x2,
+               /,
+               out=None,
+               *,
+               where=True,
+               dtype=None,
+               subok=True):
     """
-    Compute the truth value of x1 XOR x2, element-wise.
+    Compute the truth value of x1 XOR x2 element-wise.
 
     For full documentation refer to :obj:`numpy.logical_xor`.
 
+    Returns
+    -------
+    out : dpnp.ndarray
+        Output array of bool type, element-wise logical comparison of `x1` and `x2`.
+
     Limitations
     -----------
-    Input arrays are supported as :obj:`dpnp.ndarray`.
+    Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar,
+    but not both (at least either `x1` or `x2` should be as :class:`dpnp.ndarray`).
+    Parameters `out`, `where`, `dtype` and `subok` are supported with their default values.
     Otherwise the function will be executed sequentially on CPU.
-    Input array data types are limited by supported DPNP :ref:`Data types`.
-    Parameter ``out`` is supported only with default value ``None``.
-    Parameter ``where`` is supported only with default value ``True``.
+    Input array data types are limited by supported DPNP :ref:`Data types`,
+    excluding `dpnp.complex64` and `dpnp.complex128`.
 
     See Also
     --------
@@ -895,15 +973,26 @@ def logical_xor(x1, x2, out=None, **kwargs):
 
     """
 
-    # x1_desc = dpnp.get_dpnp_descriptor(x1)
-    # x2_desc = dpnp.get_dpnp_descriptor(x2)
-    # if x1_desc and x2_desc and not kwargs:
-    #     if out is not None:
-    #         pass
-    #     else:
-    #         return dpnp_logical_xor(x1_desc, x2_desc).get_pyobj()
+    if out is not None:
+        pass
+    elif where is not True:
+        pass
+    elif dtype is not None:
+        pass
+    elif subok is not True:
+        pass
+    elif dpnp.isscalar(x1) and dpnp.isscalar(x2):
+        # at least either x1 or x2 has to be an array
+        pass
+    else:
+        # get a common queue to copy data from the host into a device if any input is scalar
+        queue = get_common_allocation_queue([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else None
 
-    return call_origin(numpy.logical_xor, x1, x2, out, **kwargs)
+        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue)
+        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue)
+        if x1_desc and x2_desc:
+            return dpnp_logical_xor(x1_desc, x2_desc).get_pyobj()
+    return call_origin(numpy.logical_xor, x1, x2)
 
 
 def not_equal(x1,
diff --git a/tests/helper.py b/tests/helper.py
new file mode 100644
index 000000000000..e16bc7405381
--- /dev/null
+++ b/tests/helper.py
@@ -0,0 +1,31 @@
+import dpctl
+import dpnp
+
+
+def get_all_dtypes(no_bool=False, no_float16=True, no_complex=False, device=None):
+    """
+    Build a list of types supported by DPNP based on input flags and device capabilities.
+    """
+
+    dev = dpctl.SyclQueue().sycl_device if device is None else device
+
+    # add boolean type
+    dtypes = [dpnp.bool] if not no_bool else []
+
+    # add integer types
+    dtypes.extend([dpnp.int32, dpnp.int64])
+
+    # add floating types
+    if not no_float16 and dev.has_aspect_fp16:
+        dtypes.append(dpnp.float16)
+
+    dtypes.append(dpnp.float32)
+    if dev.has_aspect_fp64:
+        dtypes.append(dpnp.float64)
+
+    # add complex types
+    if not no_complex:
+        dtypes.append(dpnp.complex64)
+        if dev.has_aspect_fp64:
+            dtypes.append(dpnp.complex128)
+    return dtypes
diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl
index 5426e386bbca..c64c7fa45f99 100644
--- a/tests/skipped_tests_gpu.tbl
+++ b/tests/skipped_tests_gpu.tbl
@@ -858,7 +858,7 @@ tests/third_party/cupy/logic_tests/test_comparison.py::TestArrayEqual::test_arra
 tests/third_party/cupy/logic_tests/test_comparison.py::TestArrayEqual::test_array_equal_diff_length
 tests/third_party/cupy/logic_tests/test_comparison.py::TestArrayEqual::test_array_equal_is_equal
 tests/third_party/cupy/logic_tests/test_comparison.py::TestArrayEqual::test_array_equal_not_equal
-tests/third_party/cupy/logic_tests/test_comparison.py::TestComparisonOperator::test_binary_npscalar_array
+
 tests/third_party/cupy/manipulation_tests/test_dims.py::TestBroadcast_param_0_{shapes=[(), ()]}::test_broadcast
 tests/third_party/cupy/manipulation_tests/test_dims.py::TestBroadcast_param_0_{shapes=[(), ()]}::test_broadcast_arrays
 tests/third_party/cupy/manipulation_tests/test_dims.py::TestBroadcast_param_10_{shapes=[(0, 1, 1, 0, 3), (5, 2, 0, 1, 0, 0, 3), (2, 1, 0, 0, 0, 3)]}::test_broadcast
diff --git a/tests/test_logic.py b/tests/test_logic.py
index 062300bb8d3a..425106fd2efe 100644
--- a/tests/test_logic.py
+++ b/tests/test_logic.py
@@ -1,4 +1,5 @@
 import pytest
+from .helper import get_all_dtypes
 
 import dpnp
 
@@ -9,9 +10,7 @@
 )
 
 
-@pytest.mark.parametrize("type",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32, numpy.bool_],
-                         ids=['float64', 'float32', 'int64', 'int32', 'bool'])
+@pytest.mark.parametrize("type", get_all_dtypes(no_complex=True))
 @pytest.mark.parametrize("shape",
                          [(0,), (4,), (2, 3), (2, 2, 2)],
                          ids=['(0,)', '(4,)', '(2,3)', '(2,2,2)'])
@@ -42,9 +41,7 @@ def test_all(type, shape):
         assert_allclose(dpnp_res, np_res)
 
 
-@pytest.mark.parametrize("type",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=['float64', 'float32', 'int64', 'int32'])
+@pytest.mark.parametrize("type", get_all_dtypes(no_bool=True, no_complex=True))
 def test_allclose(type):
 
     a = numpy.random.rand(10)
@@ -66,9 +63,7 @@ def test_allclose(type):
     assert_allclose(dpnp_res, np_res)
 
 
-@pytest.mark.parametrize("type",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32, numpy.bool_],
-                         ids=['float64', 'float32', 'int64', 'int32', 'bool'])
+@pytest.mark.parametrize("type", get_all_dtypes(no_complex=True))
 @pytest.mark.parametrize("shape",
                          [(0,), (4,), (2, 3), (2, 2, 2)],
                          ids=['(0,)', '(4,)', '(2,3)', '(2,2,2)'])
@@ -153,17 +148,60 @@ def test_not_equal():
         assert_equal(dpnp_res, np_res)
 
 
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_complex=True))
 @pytest.mark.parametrize("op",
-                         ['equal', 'greater', 'greater_equal', 'less', 'less_equal', 'not_equal'],
-                         ids=['equal', 'greater', 'greater_equal', 'less', 'less_equal', 'not_equal'])
+                        ['logical_and', 'logical_or', 'logical_xor'],
+                        ids=['logical_and', 'logical_or', 'logical_xor'])
+def test_logic_comparison(op, dtype):
+    a = numpy.array([0, 0, 3, 2], dtype=dtype)
+    b = numpy.array([0, 4, 0, 2], dtype=dtype)
+
+    # x1 OP x2
+    np_res = getattr(numpy, op)(a, b)
+    dpnp_res = getattr(dpnp, op)(dpnp.array(a), dpnp.array(b))
+    assert_equal(dpnp_res, np_res)
+
+    # x2 OP x1
+    np_res = getattr(numpy, op)(b, a)
+    dpnp_res = getattr(dpnp, op)(dpnp.array(b), dpnp.array(a))
+    assert_equal(dpnp_res, np_res)
+
+    # numpy.tile(x1, (10,)) OP numpy.tile(x2, (10,))
+    a, b = numpy.tile(a, (10,)), numpy.tile(b, (10,))
+    np_res = getattr(numpy, op)(a, b)
+    dpnp_res = getattr(dpnp, op)(dpnp.array(a), dpnp.array(b))
+    assert_equal(dpnp_res, np_res)
+
+    # numpy.tile(x2, (10, 2)) OP numpy.tile(x1, (10, 2))
+    a, b = numpy.tile(a, (10, 1)), numpy.tile(b, (10, 1))
+    np_res = getattr(numpy, op)(b, a)
+    dpnp_res = getattr(dpnp, op)(dpnp.array(b), dpnp.array(a))
+    assert_equal(dpnp_res, np_res)
+
+
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_complex=True))
+def test_logical_not(dtype):
+    a = dpnp.array([0, 4, 0, 2], dtype=dtype)
+
+    np_res = numpy.logical_not(a.asnumpy())
+    dpnp_res = dpnp.logical_not(a)
+    assert_equal(dpnp_res, np_res)
+
+
+@pytest.mark.parametrize("op",
+                         ['equal', 'greater', 'greater_equal', 'less', 'less_equal',
+                          'logical_and', 'logical_or', 'logical_xor', 'not_equal'],
+                         ids=['equal', 'greater', 'greater_equal', 'less', 'less_equal',
+                              'logical_and', 'logical_or', 'logical_xor', 'not_equal'])
 @pytest.mark.parametrize("x1",
                          [[3, 4, 5, 6], [[1, 2, 3, 4], [5, 6, 7, 8]], [[1, 2, 5, 6], [3, 4, 7, 8], [1, 2, 7, 8]]],
                          ids=['[3, 4, 5, 6]', '[[1, 2, 3, 4], [5, 6, 7, 8]]', '[[1, 2, 5, 6], [3, 4, 7, 8], [1, 2, 7, 8]]'])
 @pytest.mark.parametrize("x2",
                          [5, [1, 2, 5, 6]],
                          ids=['5', '[1, 2, 5, 6]'])
-def test_elemwise_comparison(op, x1, x2):
-    create_func = lambda xp, a: xp.asarray(a) if not numpy.isscalar(a) else a
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_complex=True))
+def test_elemwise_comparison(op, x1, x2, dtype):
+    create_func = lambda xp, a: xp.asarray(a, dtype=dtype) if not numpy.isscalar(a) else numpy.dtype(dtype=dtype).type(a)
 
     np_x1, np_x2 = create_func(numpy, x1), create_func(numpy, x2)
     dp_x1, dp_x2 = create_func(dpnp, np_x1), create_func(dpnp, np_x2)
@@ -185,8 +223,10 @@ def test_elemwise_comparison(op, x1, x2):
 
 
 @pytest.mark.parametrize("op",
-                         ['equal', 'greater', 'greater_equal', 'less', 'less_equal', 'not_equal'],
-                         ids=['equal', 'greater', 'greater_equal', 'less', 'less_equal', 'not_equal'])
+                         ['equal', 'greater', 'greater_equal', 'less', 'less_equal',
+                          'logical_and', 'logical_or', 'logical_xor', 'not_equal'],
+                         ids=['equal', 'greater', 'greater_equal', 'less', 'less_equal',
+                              'logical_and', 'logical_or', 'logical_xor', 'not_equal'])
 @pytest.mark.parametrize("sh1",
                          [[10], [8, 4], [4, 1, 2]],
                          ids=['(10,)', '(8, 4)', '(4, 1, 2)'])
diff --git a/tests/third_party/cupy/logic_tests/test_comparison.py b/tests/third_party/cupy/logic_tests/test_comparison.py
index 461f00319bc7..67848359188d 100644
--- a/tests/third_party/cupy/logic_tests/test_comparison.py
+++ b/tests/third_party/cupy/logic_tests/test_comparison.py
@@ -37,7 +37,6 @@ def test_equal(self):
         self.check_binary('equal')
 
 
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestComparisonOperator(unittest.TestCase):
 
diff --git a/tests/third_party/cupy/logic_tests/test_ops.py b/tests/third_party/cupy/logic_tests/test_ops.py
index 55b8617882b1..cdbd035cd265 100644
--- a/tests/third_party/cupy/logic_tests/test_ops.py
+++ b/tests/third_party/cupy/logic_tests/test_ops.py
@@ -20,18 +20,14 @@ def check_binary(self, name, xp, dtype):
         b = testing.shaped_reverse_arange((2, 3), xp, dtype)
         return getattr(xp, name)(a, b)
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_logical_and(self):
         self.check_binary('logical_and')
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_logical_or(self):
         self.check_binary('logical_or')
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_logical_xor(self):
         self.check_binary('logical_xor')
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_logical_not(self):
         self.check_unary('logical_not')