IntelPython · antonwolfy · Feb 16, 2023 · Feb 3, 2023 · Feb 7, 2023 · Feb 8, 2023
@@ -93,6 +93,7 @@ string(CONCAT COMMON_COMPILE_FLAGS
   "-fsycl "
   "-fsycl-device-code-split=per_kernel "
   "-fno-approx-func "
+  "-fno-finite-math-only "
 )
 string(CONCAT COMMON_LINK_FLAGS
   "-fsycl "

@@ -132,10 +132,10 @@ MACRO_2ARG_3TYPES_OP(dpnp_copysign_c,
 
 MACRO_2ARG_3TYPES_OP(dpnp_divide_c,
                      input1_elem / input2_elem,
-                     nullptr,
-                     std::false_type,
+                     x1 / x2,
+                     MACRO_UNPACK_TYPES(bool, std::int32_t, std::int64_t),
                      oneapi::mkl::vm::div,
-                     MACRO_UNPACK_TYPES(float, double))
+                     MACRO_UNPACK_TYPES(float, double, std::complex<float>, std::complex<double>))
 
 MACRO_2ARG_3TYPES_OP(dpnp_fmod_c,
                      sycl::fmod((double)input1_elem, (double)input2_elem),
@@ -169,7 +169,7 @@ MACRO_2ARG_3TYPES_OP(dpnp_minimum_c,
 // pytest "tests/third_party/cupy/creation_tests/test_ranges.py::TestMgrid::test_mgrid3"
 // requires multiplication shape1[10] with shape2[10,1] and result expected as shape[10,10]
 MACRO_2ARG_3TYPES_OP(dpnp_multiply_c,
-                     input1_elem* input2_elem,
+                     input1_elem * input2_elem,
                      x1 * x2,
                      MACRO_UNPACK_TYPES(bool, std::int32_t, std::int64_t),
                      oneapi::mkl::vm::mul,

@@ -417,8 +417,26 @@ size_t operator-(DPNPFuncType lhs, DPNPFuncType rhs);
  */
 typedef struct DPNPFuncData
 {
-    DPNPFuncType return_type; /**< return type identifier which expected by the @ref ptr function */
-    void* ptr;                /**< C++ backend function pointer */
+    DPNPFuncData(const DPNPFuncType gen_type, void* gen_ptr, const DPNPFuncType type_no_fp64, void* ptr_no_fp64)
+        : return_type(gen_type)
+        , ptr(gen_ptr)
+        , return_type_no_fp64(type_no_fp64)
+        , ptr_no_fp64(ptr_no_fp64)
+    {
+    }
+    DPNPFuncData(const DPNPFuncType gen_type, void* gen_ptr)
+        : DPNPFuncData(gen_type, gen_ptr, DPNPFuncType::DPNP_FT_NONE, nullptr)
+    {
+    }
+    DPNPFuncData()
+        : DPNPFuncData(DPNPFuncType::DPNP_FT_NONE, nullptr)
+    {
+    }
+
+    DPNPFuncType return_type;         /**< return type identifier which expected by the @ref ptr function */
+    void* ptr;                        /**< C++ backend function pointer */
+    DPNPFuncType return_type_no_fp64; /**< alternative return type identifier when no fp64 support by device */
+    void* ptr_no_fp64;                /**< alternative C++ backend function pointer when no fp64 support by device */
 } DPNPFuncData_t;
 
 /**

@@ -1029,26 +1029,50 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap)
                                                                                                                        \
                     if (start + static_cast<size_t>(vec_sz) * max_sg_size < result_size)                               \
                     {                                                                                                  \
-                        sycl::vec<_DataType_input1, vec_sz> x1 =                                                       \
-                            sg.load<vec_sz>(sycl::multi_ptr<_DataType_input1, global_space>(&input1_data[start]));     \
-                        sycl::vec<_DataType_input2, vec_sz> x2 =                                                       \
-                            sg.load<vec_sz>(sycl::multi_ptr<_DataType_input2, global_space>(&input2_data[start]));     \
+                        using input1_ptrT = sycl::multi_ptr<_DataType_input1, global_space>;                           \
+                        using input2_ptrT = sycl::multi_ptr<_DataType_input2, global_space>;                           \
+                        using result_ptrT = sycl::multi_ptr<_DataType_output, global_space>;                           \
+                                                                                                                       \
                         sycl::vec<_DataType_output, vec_sz> res_vec;                                                   \
                                                                                                                        \
-                        if constexpr (both_types_are_same<_DataType_input1, _DataType_input2, __vec_types__>)          \
+                        if constexpr (both_types_are_any_of<_DataType_input1, _DataType_input2, __vec_types__>)        \
                         {                                                                                              \
-                            res_vec = __vec_operation__;                                                               \
+                            if constexpr (both_types_are_same<_DataType_input1, _DataType_input2, _DataType_output>)   \
+                            {                                                                                          \
+                                sycl::vec<_DataType_input1, vec_sz> x1 =                                               \
+                                    sg.load<vec_sz>(input1_ptrT(&input1_data[start]));                                 \
+                                sycl::vec<_DataType_input2, vec_sz> x2 =                                               \
+                                    sg.load<vec_sz>(input2_ptrT(&input2_data[start]));                                 \
+                                                                                                                       \
+                                res_vec = __vec_operation__;                                                           \
+                            }                                                                                          \
+                            else /* input types don't match result type, so explicit casting is required */            \
+                            {                                                                                          \
+                                sycl::vec<_DataType_output, vec_sz> x1 =                                               \
+                                    dpnp_vec_cast<_DataType_output, _DataType_input1, vec_sz>(                         \
+                                        sg.load<vec_sz>(input1_ptrT(&input1_data[start])));                            \
+                                sycl::vec<_DataType_output, vec_sz> x2 =                                               \
+                                    dpnp_vec_cast<_DataType_output, _DataType_input2, vec_sz>(                         \
+                                        sg.load<vec_sz>(input2_ptrT(&input2_data[start])));                            \
+                                                                                                                       \
+                                res_vec = __vec_operation__;                                                           \
+                            }                                                                                          \
                         }                                                                                              \
                         else                                                                                           \
                         {                                                                                              \
+                            sycl::vec<_DataType_input1, vec_sz> x1 =                                                   \
+                                sg.load<vec_sz>(input1_ptrT(&input1_data[start]));                                     \
+                            sycl::vec<_DataType_input2, vec_sz> x2 =                                                   \
+                                sg.load<vec_sz>(input2_ptrT(&input2_data[start]));                                     \
+                                                                                                                       \
                             for (size_t k = 0; k < vec_sz; ++k)                                                        \
                             {                                                                                          \
                                 const _DataType_output input1_elem = x1[k];                                            \
                                 const _DataType_output input2_elem = x2[k];                                            \
                                 res_vec[k] = __operation__;                                                            \
                             }                                                                                          \
                         }                                                                                              \
-                        sg.store<vec_sz>(sycl::multi_ptr<_DataType_output, global_space>(&result[start]), res_vec);    \
+                        sg.store<vec_sz>(result_ptrT(&result[start]), res_vec);                                        \
                     }                                                                                                  \
                     else                                                                                               \
                     {                                                                                                  \
@@ -1173,6 +1197,47 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap)
 
 #include <dpnp_gen_2arg_3type_tbl.hpp>
 
+template <DPNPFuncType FT1, DPNPFuncType FT2, typename has_fp64 = std::true_type>
+static constexpr DPNPFuncType get_divide_res_type()
+{
+    constexpr auto widest_type = populate_func_types<FT1, FT2>();
+    constexpr auto shortes_type = (widest_type == FT1) ? FT2 : FT1;
+
+    if constexpr (widest_type == DPNPFuncType::DPNP_FT_CMPLX128 || widest_type == DPNPFuncType::DPNP_FT_DOUBLE)
+    {
+        return widest_type;
+    }
+    else if constexpr (widest_type == DPNPFuncType::DPNP_FT_CMPLX64)
+    {
+        if constexpr (shortes_type == DPNPFuncType::DPNP_FT_DOUBLE)
+        {
+            return DPNPFuncType::DPNP_FT_CMPLX128;
+        }
+        else if constexpr (has_fp64::value &&
+                           (shortes_type == DPNPFuncType::DPNP_FT_INT || shortes_type == DPNPFuncType::DPNP_FT_LONG))
+        {
+            return DPNPFuncType::DPNP_FT_CMPLX128;
+        }
+    }
+    else if constexpr (widest_type == DPNPFuncType::DPNP_FT_FLOAT)
+    {
+        if constexpr (has_fp64::value &&
+                      (shortes_type == DPNPFuncType::DPNP_FT_INT || shortes_type == DPNPFuncType::DPNP_FT_LONG))
+        {
+            return DPNPFuncType::DPNP_FT_DOUBLE;
+        }
+    }
+    else if constexpr (has_fp64::value)
+    {
+        return DPNPFuncType::DPNP_FT_DOUBLE;
+    }
+    else
+    {
+        return DPNPFuncType::DPNP_FT_FLOAT;
+    }
+    return widest_type;
+}
+
 template <DPNPFuncType FT1, DPNPFuncType... FTs>
 static void func_map_elemwise_2arg_3type_core(func_map_t& fmap)
 {
@@ -1194,6 +1259,16 @@ static void func_map_elemwise_2arg_3type_core(func_map_t& fmap)
                                       func_type_map_t::find_type<FT1>,
                                       func_type_map_t::find_type<FTs>>}),
      ...);
+    ((fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][FT1][FTs] =
+          {get_divide_res_type<FT1, FTs>(),
+           (void*)dpnp_divide_c_ext<func_type_map_t::find_type<get_divide_res_type<FT1, FTs>()>,
+                                    func_type_map_t::find_type<FT1>,
+                                    func_type_map_t::find_type<FTs>>,
+           get_divide_res_type<FT1, FTs, std::false_type>(),
+           (void*)dpnp_divide_c_ext<func_type_map_t::find_type<get_divide_res_type<FT1, FTs, std::false_type>()>,
+                                    func_type_map_t::find_type<FT1>,
+                                    func_type_map_t::find_type<FTs>>}),
+     ...);
 }
 
 template <DPNPFuncType... FTs>
@@ -1402,39 +1477,6 @@ static void func_map_init_elemwise_2arg_3type(func_map_t& fmap)
     fmap[DPNPFuncName::DPNP_FN_DIVIDE][eft_DBL][eft_DBL] = {eft_DBL,
                                                             (void*)dpnp_divide_c_default<double, double, double>};
 
-    fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_INT][eft_INT] = {eft_DBL,
-                                                                (void*)dpnp_divide_c_ext<double, int32_t, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_INT][eft_LNG] = {eft_DBL,
-                                                                (void*)dpnp_divide_c_ext<double, int32_t, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_INT][eft_FLT] = {eft_DBL,
-                                                                (void*)dpnp_divide_c_ext<double, int32_t, float>};
-    fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_INT][eft_DBL] = {eft_DBL,
-                                                                (void*)dpnp_divide_c_ext<double, int32_t, double>};
-    fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_LNG][eft_INT] = {eft_DBL,
-                                                                (void*)dpnp_divide_c_ext<double, int64_t, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_LNG][eft_LNG] = {eft_DBL,
-                                                                (void*)dpnp_divide_c_ext<double, int64_t, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_LNG][eft_FLT] = {eft_DBL,
-                                                                (void*)dpnp_divide_c_ext<double, int64_t, float>};
-    fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_LNG][eft_DBL] = {eft_DBL,
-                                                                (void*)dpnp_divide_c_ext<double, int64_t, double>};
-    fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_FLT][eft_INT] = {eft_DBL,
-                                                                (void*)dpnp_divide_c_ext<double, float, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_FLT][eft_LNG] = {eft_DBL,
-                                                                (void*)dpnp_divide_c_ext<double, float, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_FLT][eft_FLT] = {eft_FLT,
-                                                                (void*)dpnp_divide_c_ext<float, float, float>};
-    fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_FLT][eft_DBL] = {eft_DBL,
-                                                                (void*)dpnp_divide_c_ext<double, float, double>};
-    fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_DBL][eft_INT] = {eft_DBL,
-                                                                (void*)dpnp_divide_c_ext<double, double, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_DBL][eft_LNG] = {eft_DBL,
-                                                                (void*)dpnp_divide_c_ext<double, double, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_DBL][eft_FLT] = {eft_DBL,
-                                                                (void*)dpnp_divide_c_ext<double, double, float>};
-    fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_DBL][eft_DBL] = {eft_DBL,
-                                                                (void*)dpnp_divide_c_ext<double, double, double>};
-
     fmap[DPNPFuncName::DPNP_FN_FMOD][eft_INT][eft_INT] = {eft_INT,
                                                           (void*)dpnp_fmod_c_default<int32_t, int32_t, int32_t>};
     fmap[DPNPFuncName::DPNP_FN_FMOD][eft_INT][eft_LNG] = {eft_LNG,

@@ -35,6 +35,8 @@
 #include <map>
 #include <complex>
 
+#include <CL/sycl.hpp>
+
 #include <dpnp_iface_fptr.hpp>
 
 /**
@@ -116,6 +118,31 @@ static constexpr DPNPFuncType populate_func_types()
     return (FT1 < FT2) ? FT2 : FT1;
 }
 
+/**
+ * @brief A helper function to cast SYCL vector between types.
+ */
+template <typename Op, typename Vec, std::size_t... I>
+static auto dpnp_vec_cast_impl(const Vec& v, std::index_sequence<I...>)
+{
+    return Op{v[I]...};
+}
+
+/**
+ * @brief A casting function for SYCL vector.
+ * 
+ * @tparam dstT A result type upon casting.
+ * @tparam srcT An incoming type of the vector.
+ * @tparam N A number of elements with the vector.
+ * @tparam Indices A sequence of integers
+ * @param s An incoming SYCL vector to cast.
+ * @return SYCL vector casted to desctination type.
+ */
+template <typename dstT, typename srcT, std::size_t N, typename Indices = std::make_index_sequence<N>>
+static auto dpnp_vec_cast(const sycl::vec<srcT, N>& s)
+{
+    return dpnp_vec_cast_impl<sycl::vec<dstT, N>, sycl::vec<srcT, N>>(s, Indices{});
+}
+
 /**
  * Removes parentheses for a passed list of types separated by comma.
  * It's intended to be used in operations macro.
@@ -142,6 +169,12 @@ struct are_same : std::conjunction<std::is_same<T, Ts>...> {};
 template <typename T1, typename T2, typename... Ts>
 constexpr auto both_types_are_same = std::conjunction_v<is_any<T1, Ts...>, are_same<T1, T2>>;
 
+/**
+ * A template constat to check if both types T1 and T2 match any type from Ts.
+ */
+template <typename T1, typename T2, typename... Ts>
+constexpr auto both_types_are_any_of = std::conjunction_v<is_any<T1, Ts...>, is_any<T2, Ts...>>;
+
 /**
  * A template constat to check if both types T1 and T2 don't match any type from Ts sequence.
  */

@@ -374,6 +374,8 @@ cdef extern from "dpnp_iface_fptr.hpp":
     struct DPNPFuncData:
         DPNPFuncType return_type
         void * ptr
+        DPNPFuncType return_type_no_fp64
+        void *ptr_no_fp64
 
     DPNPFuncData get_dpnp_function_ptr(DPNPFuncName name, DPNPFuncType first_type, DPNPFuncType second_type) except +