use_dpctl_remainder_func

vtavana · vtavana · commit 3dedb2ff1fb5 · 2023-08-08T09:11:27.000-05:00
diff --git a/dpnp/backend/extensions/vm/remainder.hpp b/dpnp/backend/extensions/vm/remainder.hpp
@@ -0,0 +1,83 @@
+//*****************************************************************************
+// Copyright (c) 2023, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#pragma once
+
+#include <CL/sycl.hpp>
+
+#include "common.hpp"
+#include "types_matrix.hpp"
+
+namespace dpnp
+{
+namespace backend
+{
+namespace ext
+{
+namespace vm
+{
+template <typename T>
+sycl::event remainder_contig_impl(sycl::queue exec_q,
+                                  const std::int64_t n,
+                                  const char *in_a,
+                                  const char *in_b,
+                                  char *out_y,
+                                  const std::vector<sycl::event> &depends)
+{
+    type_utils::validate_type_for_device<T>(exec_q);
+
+    const T *a = reinterpret_cast<const T *>(in_a);
+    const T *b = reinterpret_cast<const T *>(in_b);
+    T *y = reinterpret_cast<T *>(out_y);
+
+    return mkl_vm::remainder(
+        exec_q,
+        n, // number of elements to be calculated
+        a, // pointer `a` containing 1st input vector of size n
+        b, // pointer `b` containing 2nd input vector of size n
+        y, // pointer `y` to the output vector of size n
+        depends);
+}
+
+template <typename fnT, typename T>
+struct RemainderContigFactory
+{
+    fnT get()
+    {
+        if constexpr (std::is_same_v<
+                          typename types::RemainderOutputType<T>::value_type,
+                          void>)
+        {
+            return nullptr;
+        }
+        else {
+            return remainder_contig_impl<T>;
+        }
+    }
+};
+} // namespace vm
+} // namespace ext
+} // namespace backend
+} // namespace dpnp
diff --git a/dpnp/backend/extensions/vm/types_matrix.hpp b/dpnp/backend/extensions/vm/types_matrix.hpp
@@ -68,6 +68,21 @@ struct DivOutputType
         dpctl_td_ns::DefaultResultEntry<void>>::result_type;
 };
 
+/**
+ * @brief A factory to define pairs of supported types for which
+ * MKL VM library provides support in oneapi::mkl::vm::remainder<T> function.
+ *
+ * @tparam T Type of input vectors `a` and `b` and of result vector `y`.
+ */
+template <typename T>
+struct RemainderOutputType
+{
+    using value_type = typename std::disjunction<
+        dpctl_td_ns::BinaryTypeMapResultEntry<T, double, T, double, double>,
+        dpctl_td_ns::BinaryTypeMapResultEntry<T, float, T, float, float>,
+        dpctl_td_ns::DefaultResultEntry<void>>::result_type;
+};
+
 /**
  * @brief A factory to define pairs of supported types for which
  * MKL VM library provides support in oneapi::mkl::vm::cos<T> function.
diff --git a/dpnp/backend/extensions/vm/vm_py.cpp b/dpnp/backend/extensions/vm/vm_py.cpp
@@ -34,6 +34,7 @@
 #include "cos.hpp"
 #include "div.hpp"
 #include "ln.hpp"
+#include "remainder.hpp"
 #include "sin.hpp"
 #include "sqr.hpp"
 #include "sqrt.hpp"
@@ -46,6 +47,7 @@ using vm_ext::binary_impl_fn_ptr_t;
 using vm_ext::unary_impl_fn_ptr_t;
 
 static binary_impl_fn_ptr_t div_dispatch_vector[dpctl_td_ns::num_types];
+static binary_impl_fn_ptr_t remainder_dispatch_vector[dpctl_td_ns::num_types];
 
 static unary_impl_fn_ptr_t cos_dispatch_vector[dpctl_td_ns::num_types];
 static unary_impl_fn_ptr_t ln_dispatch_vector[dpctl_td_ns::num_types];
@@ -88,6 +90,37 @@ PYBIND11_MODULE(_vm_impl, m)
               py::arg("dst"));
     }
 
+    // BinaryUfunc: ==== REMAINDER(x1, x2) ====
+    {
+        vm_ext::init_ufunc_dispatch_vector<binary_impl_fn_ptr_t,
+                                           vm_ext::RemainderContigFactory>(
+            remainder_dispatch_vector);
+
+        auto remainder_pyapi = [&](sycl::queue exec_q, arrayT src1, arrayT src2,
+                                   arrayT dst, const event_vecT &depends = {}) {
+            return vm_ext::binary_ufunc(exec_q, src1, src2, dst, depends,
+                                        remainder_dispatch_vector);
+        };
+        m.def("_remainder", remainder_pyapi,
+              "Call `remainder` function from OneMKL VM library to performs "
+              "element "
+              "by element remainder of vector `src1` by vector `src2` "
+              "to resulting vector `dst`",
+              py::arg("sycl_queue"), py::arg("src1"), py::arg("src2"),
+              py::arg("dst"), py::arg("depends") = py::list());
+
+        auto remainder_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src1,
+                                                arrayT src2, arrayT dst) {
+            return vm_ext::need_to_call_binary_ufunc(exec_q, src1, src2, dst,
+                                                     remainder_dispatch_vector);
+        };
+        m.def("_mkl_remainder_to_call", remainder_need_to_call_pyapi,
+              "Check input arguments to answer if `remainder` function from "
+              "OneMKL VM library can be used",
+              py::arg("sycl_queue"), py::arg("src1"), py::arg("src2"),
+              py::arg("dst"));
+    }
+
     // UnaryUfunc: ==== Cos(x) ====
     {
         vm_ext::init_ufunc_dispatch_vector<unary_impl_fn_ptr_t,
diff --git a/dpnp/backend/include/dpnp_iface_fptr.hpp b/dpnp/backend/include/dpnp_iface_fptr.hpp
@@ -331,8 +331,6 @@ enum class DPNPFuncName : size_t
     DPNP_FN_RADIANS_EXT,     /**< Used in numpy.radians() impl, requires extra
                                 parameters */
     DPNP_FN_REMAINDER,       /**< Used in numpy.remainder() impl  */
-    DPNP_FN_REMAINDER_EXT,   /**< Used in numpy.remainder() impl, requires extra
-                                parameters */
     DPNP_FN_RECIP,           /**< Used in numpy.recip() impl  */
     DPNP_FN_RECIP_EXT,       /**< Used in numpy.recip() impl, requires extra
                                 parameters */
diff --git a/dpnp/backend/kernels/dpnp_krnl_mathematical.cpp b/dpnp/backend/kernels/dpnp_krnl_mathematical.cpp
@@ -988,23 +988,6 @@ void (*dpnp_remainder_default_c)(void *,
                                  const size_t *) =
     dpnp_remainder_c<_DataType_output, _DataType_input1, _DataType_input2>;
 
-template <typename _DataType_output,
-          typename _DataType_input1,
-          typename _DataType_input2>
-DPCTLSyclEventRef (*dpnp_remainder_ext_c)(DPCTLSyclQueueRef,
-                                          void *,
-                                          const void *,
-                                          const size_t,
-                                          const shape_elem_type *,
-                                          const size_t,
-                                          const void *,
-                                          const size_t,
-                                          const shape_elem_type *,
-                                          const size_t,
-                                          const size_t *,
-                                          const DPCTLEventVectorRef) =
-    dpnp_remainder_c<_DataType_output, _DataType_input1, _DataType_input2>;
-
 template <typename _KernelNameSpecialization1,
           typename _KernelNameSpecialization2,
           typename _KernelNameSpecialization3>
@@ -1385,39 +1368,6 @@ void func_map_init_mathematical(func_map_t &fmap)
     fmap[DPNPFuncName::DPNP_FN_REMAINDER][eft_DBL][eft_DBL] = {
         eft_DBL, (void *)dpnp_remainder_default_c<double, double, double>};
 
-    fmap[DPNPFuncName::DPNP_FN_REMAINDER_EXT][eft_INT][eft_INT] = {
-        eft_INT, (void *)dpnp_remainder_ext_c<int32_t, int32_t, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_REMAINDER_EXT][eft_INT][eft_LNG] = {
-        eft_LNG, (void *)dpnp_remainder_ext_c<int64_t, int32_t, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_REMAINDER_EXT][eft_INT][eft_FLT] = {
-        eft_DBL, (void *)dpnp_remainder_ext_c<double, int32_t, float>};
-    fmap[DPNPFuncName::DPNP_FN_REMAINDER_EXT][eft_INT][eft_DBL] = {
-        eft_DBL, (void *)dpnp_remainder_ext_c<double, int32_t, double>};
-    fmap[DPNPFuncName::DPNP_FN_REMAINDER_EXT][eft_LNG][eft_INT] = {
-        eft_LNG, (void *)dpnp_remainder_ext_c<int64_t, int64_t, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_REMAINDER_EXT][eft_LNG][eft_LNG] = {
-        eft_LNG, (void *)dpnp_remainder_ext_c<int64_t, int64_t, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_REMAINDER_EXT][eft_LNG][eft_FLT] = {
-        eft_DBL, (void *)dpnp_remainder_ext_c<double, int64_t, float>};
-    fmap[DPNPFuncName::DPNP_FN_REMAINDER_EXT][eft_LNG][eft_DBL] = {
-        eft_DBL, (void *)dpnp_remainder_ext_c<double, int64_t, double>};
-    fmap[DPNPFuncName::DPNP_FN_REMAINDER_EXT][eft_FLT][eft_INT] = {
-        eft_DBL, (void *)dpnp_remainder_ext_c<double, float, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_REMAINDER_EXT][eft_FLT][eft_LNG] = {
-        eft_DBL, (void *)dpnp_remainder_ext_c<double, float, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_REMAINDER_EXT][eft_FLT][eft_FLT] = {
-        eft_FLT, (void *)dpnp_remainder_ext_c<float, float, float>};
-    fmap[DPNPFuncName::DPNP_FN_REMAINDER_EXT][eft_FLT][eft_DBL] = {
-        eft_DBL, (void *)dpnp_remainder_ext_c<double, float, double>};
-    fmap[DPNPFuncName::DPNP_FN_REMAINDER_EXT][eft_DBL][eft_INT] = {
-        eft_DBL, (void *)dpnp_remainder_ext_c<double, double, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_REMAINDER_EXT][eft_DBL][eft_LNG] = {
-        eft_DBL, (void *)dpnp_remainder_ext_c<double, double, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_REMAINDER_EXT][eft_DBL][eft_FLT] = {
-        eft_DBL, (void *)dpnp_remainder_ext_c<double, double, float>};
-    fmap[DPNPFuncName::DPNP_FN_REMAINDER_EXT][eft_DBL][eft_DBL] = {
-        eft_DBL, (void *)dpnp_remainder_ext_c<double, double, double>};
-
     fmap[DPNPFuncName::DPNP_FN_TRAPZ][eft_INT][eft_INT] = {
         eft_DBL, (void *)dpnp_trapz_default_c<int32_t, int32_t, double>};
     fmap[DPNPFuncName::DPNP_FN_TRAPZ][eft_INT][eft_LNG] = {
diff --git a/dpnp/dpnp_algo/dpnp_algo.pxd b/dpnp/dpnp_algo/dpnp_algo.pxd
@@ -202,8 +202,6 @@ cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName":  # need this na
         DPNP_FN_QR_EXT
         DPNP_FN_RADIANS
         DPNP_FN_RADIANS_EXT
-        DPNP_FN_REMAINDER
-        DPNP_FN_REMAINDER_EXT
         DPNP_FN_RECIP
         DPNP_FN_RECIP_EXT
         DPNP_FN_REPEAT
@@ -490,9 +488,6 @@ cpdef dpnp_descriptor dpnp_minimum(dpnp_descriptor x1_obj, dpnp_descriptor x2_ob
 cpdef dpnp_descriptor dpnp_negative(dpnp_descriptor array1)
 cpdef dpnp_descriptor dpnp_power(dpnp_descriptor x1_obj, dpnp_descriptor x2_obj, object dtype=*,
                                  dpnp_descriptor out=*, object where=*)
-cpdef dpnp_descriptor dpnp_remainder(dpnp_descriptor x1_obj, dpnp_descriptor x2_obj, object dtype=*,
-                                     dpnp_descriptor out=*, object where=*)
-
 
 """
 Array manipulation routines
diff --git a/dpnp/dpnp_algo/dpnp_algo_mathematical.pxi b/dpnp/dpnp_algo/dpnp_algo_mathematical.pxi
@@ -62,7 +62,6 @@ __all__ += [
     "dpnp_negative",
     "dpnp_power",
     "dpnp_prod",
-    "dpnp_remainder",
     "dpnp_sign",
     "dpnp_sum",
     "dpnp_trapz",
@@ -546,14 +545,6 @@ cpdef utils.dpnp_descriptor dpnp_prod(utils.dpnp_descriptor x1,
     return result
 
 
-cpdef utils.dpnp_descriptor dpnp_remainder(utils.dpnp_descriptor x1_obj,
-                                           utils.dpnp_descriptor x2_obj,
-                                           object dtype=None,
-                                           utils.dpnp_descriptor out=None,
-                                           object where=True):
-    return call_fptr_2in_1out(DPNP_FN_REMAINDER_EXT, x1_obj, x2_obj, dtype, out, where)
-
-
 cpdef utils.dpnp_descriptor dpnp_sign(utils.dpnp_descriptor x1):
     return call_fptr_1in_1out_strides(DPNP_FN_SIGN_EXT, x1)
 
diff --git a/dpnp/dpnp_algo/dpnp_elementwise_common.py b/dpnp/dpnp_algo/dpnp_elementwise_common.py
@@ -61,6 +61,7 @@
     "dpnp_logical_xor",
     "dpnp_multiply",
     "dpnp_not_equal",
+    "dpnp_remainder",
     "dpnp_sin",
     "dpnp_sqrt",
     "dpnp_square",
@@ -80,7 +81,7 @@ def check_nd_call_func(
     **kwargs,
 ):
     """
-    Checks arguments and calls function with a single input array.
+    Checks arguments and calls a function.
 
     Chooses a common internal elementwise function to call in DPNP based on input arguments
     or to fallback on NumPy call if any passed argument is not currently supported.
@@ -121,7 +122,6 @@ def check_nd_call_func(
                     order
                 )
             )
-
         return dpnp_func(*x_args, out=out, order=order)
     return call_origin(
         origin_func,
@@ -953,6 +953,66 @@ def dpnp_not_equal(x1, x2, out=None, order="K"):
     return dpnp_array._create_from_usm_ndarray(res_usm)
 
 
+_remainder_docstring_ = """
+remainder(x1, x2, out=None, order='K')
+
+Calculates the remainder of division for each element `x1_i` of the input array
+`x1` with the respective element `x2_i` of the input array `x2`.
+
+This function is equivalent to the Python modulus operator.
+
+Args:
+    x1 (dpnp.ndarray):
+        First input array, expected to have a real-valued data type.
+    x2 (dpnp.ndarray):
+        Second input array, also expected to have a real-valued data type.
+    out ({None, usm_ndarray}, optional):
+        Output array to populate.
+        Array have the correct shape and the expected data type.
+    order ("C","F","A","K", optional):
+        Memory layout of the newly output array, if parameter `out` is `None`.
+        Default: "K".
+Returns:
+    dpnp.ndarray:
+        an array containing the element-wise remainders. The data type of
+        the returned array is determined by the Type Promotion Rules.
+"""
+
+
+def dpnp_remainder(x1, x2, out=None, order="K"):
+    """
+    Invokes remainder() function from pybind11 extension of OneMKL VM if possible.
+
+    Otherwise fully relies on dpctl.tensor implementation for remainder() function.
+
+    """
+
+    def _call_remainder(src1, src2, dst, sycl_queue, depends=None):
+        """A callback to register in BinaryElementwiseFunc class of dpctl.tensor"""
+
+        if depends is None:
+            depends = []
+
+        if vmi._mkl_remainder_to_call(sycl_queue, src1, src2, dst):
+            # call pybind11 extension for remainder() function from OneMKL VM
+            return vmi._remainder(sycl_queue, src1, src2, dst, depends)
+        return ti._remainder(src1, src2, dst, sycl_queue, depends)
+
+    # dpctl.tensor only works with usm_ndarray or scalar
+    x1_usm_or_scalar = dpnp.get_usm_ndarray_or_scalar(x1)
+    x2_usm_or_scalar = dpnp.get_usm_ndarray_or_scalar(x2)
+    out_usm = None if out is None else dpnp.get_usm_ndarray(out)
+
+    func = BinaryElementwiseFunc(
+        "remainder",
+        ti._remainder_result_type,
+        _call_remainder,
+        _remainder_docstring_,
+    )
+    res_usm = func(x1_usm_or_scalar, x2_usm_or_scalar, out=out_usm, order=order)
+    return dpnp_array._create_from_usm_ndarray(res_usm)
+
+
 _sin_docstring = """
 sin(x, out=None, order='K')
 Computes sine for each element `x_i` of input array `x`.
diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py