Skip to content

dpnp.divide() doesn't work properly with a scalar #1295

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Feb 16, 2023
Merged
1 change: 1 addition & 0 deletions dpnp/backend/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ string(CONCAT COMMON_COMPILE_FLAGS
"-fsycl "
"-fsycl-device-code-split=per_kernel "
"-fno-approx-func "
"-fno-finite-math-only "
)
string(CONCAT COMMON_LINK_FLAGS
"-fsycl "
Expand Down
8 changes: 4 additions & 4 deletions dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -132,10 +132,10 @@ MACRO_2ARG_3TYPES_OP(dpnp_copysign_c,

MACRO_2ARG_3TYPES_OP(dpnp_divide_c,
input1_elem / input2_elem,
nullptr,
std::false_type,
x1 / x2,
MACRO_UNPACK_TYPES(bool, std::int32_t, std::int64_t),
oneapi::mkl::vm::div,
MACRO_UNPACK_TYPES(float, double))
MACRO_UNPACK_TYPES(float, double, std::complex<float>, std::complex<double>))

MACRO_2ARG_3TYPES_OP(dpnp_fmod_c,
sycl::fmod((double)input1_elem, (double)input2_elem),
Expand Down Expand Up @@ -169,7 +169,7 @@ MACRO_2ARG_3TYPES_OP(dpnp_minimum_c,
// pytest "tests/third_party/cupy/creation_tests/test_ranges.py::TestMgrid::test_mgrid3"
// requires multiplication shape1[10] with shape2[10,1] and result expected as shape[10,10]
MACRO_2ARG_3TYPES_OP(dpnp_multiply_c,
input1_elem* input2_elem,
input1_elem * input2_elem,
x1 * x2,
MACRO_UNPACK_TYPES(bool, std::int32_t, std::int64_t),
oneapi::mkl::vm::mul,
Expand Down
22 changes: 20 additions & 2 deletions dpnp/backend/include/dpnp_iface_fptr.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -417,8 +417,26 @@ size_t operator-(DPNPFuncType lhs, DPNPFuncType rhs);
*/
typedef struct DPNPFuncData
{
DPNPFuncType return_type; /**< return type identifier which expected by the @ref ptr function */
void* ptr; /**< C++ backend function pointer */
DPNPFuncData(const DPNPFuncType gen_type, void* gen_ptr, const DPNPFuncType type_no_fp64, void* ptr_no_fp64)
: return_type(gen_type)
, ptr(gen_ptr)
, return_type_no_fp64(type_no_fp64)
, ptr_no_fp64(ptr_no_fp64)
{
}
DPNPFuncData(const DPNPFuncType gen_type, void* gen_ptr)
: DPNPFuncData(gen_type, gen_ptr, DPNPFuncType::DPNP_FT_NONE, nullptr)
{
}
DPNPFuncData()
: DPNPFuncData(DPNPFuncType::DPNP_FT_NONE, nullptr)
{
}

DPNPFuncType return_type; /**< return type identifier which expected by the @ref ptr function */
void* ptr; /**< C++ backend function pointer */
DPNPFuncType return_type_no_fp64; /**< alternative return type identifier when no fp64 support by device */
void* ptr_no_fp64; /**< alternative C++ backend function pointer when no fp64 support by device */
} DPNPFuncData_t;

/**
Expand Down
122 changes: 82 additions & 40 deletions dpnp/backend/kernels/dpnp_krnl_elemwise.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1029,26 +1029,50 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap)
\
if (start + static_cast<size_t>(vec_sz) * max_sg_size < result_size) \
{ \
sycl::vec<_DataType_input1, vec_sz> x1 = \
sg.load<vec_sz>(sycl::multi_ptr<_DataType_input1, global_space>(&input1_data[start])); \
sycl::vec<_DataType_input2, vec_sz> x2 = \
sg.load<vec_sz>(sycl::multi_ptr<_DataType_input2, global_space>(&input2_data[start])); \
using input1_ptrT = sycl::multi_ptr<_DataType_input1, global_space>; \
using input2_ptrT = sycl::multi_ptr<_DataType_input2, global_space>; \
using result_ptrT = sycl::multi_ptr<_DataType_output, global_space>; \
\
sycl::vec<_DataType_output, vec_sz> res_vec; \
\
if constexpr (both_types_are_same<_DataType_input1, _DataType_input2, __vec_types__>) \
if constexpr (both_types_are_any_of<_DataType_input1, _DataType_input2, __vec_types__>) \
{ \
res_vec = __vec_operation__; \
if constexpr (both_types_are_same<_DataType_input1, _DataType_input2, _DataType_output>) \
{ \
sycl::vec<_DataType_input1, vec_sz> x1 = \
sg.load<vec_sz>(input1_ptrT(&input1_data[start])); \
sycl::vec<_DataType_input2, vec_sz> x2 = \
sg.load<vec_sz>(input2_ptrT(&input2_data[start])); \
\
res_vec = __vec_operation__; \
} \
else /* input types don't match result type, so explicit casting is required */ \
{ \
sycl::vec<_DataType_output, vec_sz> x1 = \
dpnp_vec_cast<_DataType_output, _DataType_input1, vec_sz>( \
sg.load<vec_sz>(input1_ptrT(&input1_data[start]))); \
sycl::vec<_DataType_output, vec_sz> x2 = \
dpnp_vec_cast<_DataType_output, _DataType_input2, vec_sz>( \
sg.load<vec_sz>(input2_ptrT(&input2_data[start]))); \
\
res_vec = __vec_operation__; \
} \
} \
else \
{ \
sycl::vec<_DataType_input1, vec_sz> x1 = \
sg.load<vec_sz>(input1_ptrT(&input1_data[start])); \
sycl::vec<_DataType_input2, vec_sz> x2 = \
sg.load<vec_sz>(input2_ptrT(&input2_data[start])); \
\
for (size_t k = 0; k < vec_sz; ++k) \
{ \
const _DataType_output input1_elem = x1[k]; \
const _DataType_output input2_elem = x2[k]; \
res_vec[k] = __operation__; \
} \
} \
sg.store<vec_sz>(sycl::multi_ptr<_DataType_output, global_space>(&result[start]), res_vec); \
sg.store<vec_sz>(result_ptrT(&result[start]), res_vec); \
} \
else \
{ \
Expand Down Expand Up @@ -1173,6 +1197,47 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap)

#include <dpnp_gen_2arg_3type_tbl.hpp>

template <DPNPFuncType FT1, DPNPFuncType FT2, typename has_fp64 = std::true_type>
static constexpr DPNPFuncType get_divide_res_type()
{
constexpr auto widest_type = populate_func_types<FT1, FT2>();
constexpr auto shortes_type = (widest_type == FT1) ? FT2 : FT1;

if constexpr (widest_type == DPNPFuncType::DPNP_FT_CMPLX128 || widest_type == DPNPFuncType::DPNP_FT_DOUBLE)
{
return widest_type;
}
else if constexpr (widest_type == DPNPFuncType::DPNP_FT_CMPLX64)
{
if constexpr (shortes_type == DPNPFuncType::DPNP_FT_DOUBLE)
{
return DPNPFuncType::DPNP_FT_CMPLX128;
}
else if constexpr (has_fp64::value &&
(shortes_type == DPNPFuncType::DPNP_FT_INT || shortes_type == DPNPFuncType::DPNP_FT_LONG))
{
return DPNPFuncType::DPNP_FT_CMPLX128;
}
}
else if constexpr (widest_type == DPNPFuncType::DPNP_FT_FLOAT)
{
if constexpr (has_fp64::value &&
(shortes_type == DPNPFuncType::DPNP_FT_INT || shortes_type == DPNPFuncType::DPNP_FT_LONG))
{
return DPNPFuncType::DPNP_FT_DOUBLE;
}
}
else if constexpr (has_fp64::value)
{
return DPNPFuncType::DPNP_FT_DOUBLE;
}
else
{
return DPNPFuncType::DPNP_FT_FLOAT;
}
return widest_type;
}

template <DPNPFuncType FT1, DPNPFuncType... FTs>
static void func_map_elemwise_2arg_3type_core(func_map_t& fmap)
{
Expand All @@ -1194,6 +1259,16 @@ static void func_map_elemwise_2arg_3type_core(func_map_t& fmap)
func_type_map_t::find_type<FT1>,
func_type_map_t::find_type<FTs>>}),
...);
((fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][FT1][FTs] =
{get_divide_res_type<FT1, FTs>(),
(void*)dpnp_divide_c_ext<func_type_map_t::find_type<get_divide_res_type<FT1, FTs>()>,
func_type_map_t::find_type<FT1>,
func_type_map_t::find_type<FTs>>,
get_divide_res_type<FT1, FTs, std::false_type>(),
(void*)dpnp_divide_c_ext<func_type_map_t::find_type<get_divide_res_type<FT1, FTs, std::false_type>()>,
func_type_map_t::find_type<FT1>,
func_type_map_t::find_type<FTs>>}),
...);
}

template <DPNPFuncType... FTs>
Expand Down Expand Up @@ -1402,39 +1477,6 @@ static void func_map_init_elemwise_2arg_3type(func_map_t& fmap)
fmap[DPNPFuncName::DPNP_FN_DIVIDE][eft_DBL][eft_DBL] = {eft_DBL,
(void*)dpnp_divide_c_default<double, double, double>};

fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_INT][eft_INT] = {eft_DBL,
(void*)dpnp_divide_c_ext<double, int32_t, int32_t>};
fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_INT][eft_LNG] = {eft_DBL,
(void*)dpnp_divide_c_ext<double, int32_t, int64_t>};
fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_INT][eft_FLT] = {eft_DBL,
(void*)dpnp_divide_c_ext<double, int32_t, float>};
fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_INT][eft_DBL] = {eft_DBL,
(void*)dpnp_divide_c_ext<double, int32_t, double>};
fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_LNG][eft_INT] = {eft_DBL,
(void*)dpnp_divide_c_ext<double, int64_t, int32_t>};
fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_LNG][eft_LNG] = {eft_DBL,
(void*)dpnp_divide_c_ext<double, int64_t, int64_t>};
fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_LNG][eft_FLT] = {eft_DBL,
(void*)dpnp_divide_c_ext<double, int64_t, float>};
fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_LNG][eft_DBL] = {eft_DBL,
(void*)dpnp_divide_c_ext<double, int64_t, double>};
fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_FLT][eft_INT] = {eft_DBL,
(void*)dpnp_divide_c_ext<double, float, int32_t>};
fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_FLT][eft_LNG] = {eft_DBL,
(void*)dpnp_divide_c_ext<double, float, int64_t>};
fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_FLT][eft_FLT] = {eft_FLT,
(void*)dpnp_divide_c_ext<float, float, float>};
fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_FLT][eft_DBL] = {eft_DBL,
(void*)dpnp_divide_c_ext<double, float, double>};
fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_DBL][eft_INT] = {eft_DBL,
(void*)dpnp_divide_c_ext<double, double, int32_t>};
fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_DBL][eft_LNG] = {eft_DBL,
(void*)dpnp_divide_c_ext<double, double, int64_t>};
fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_DBL][eft_FLT] = {eft_DBL,
(void*)dpnp_divide_c_ext<double, double, float>};
fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_DBL][eft_DBL] = {eft_DBL,
(void*)dpnp_divide_c_ext<double, double, double>};

fmap[DPNPFuncName::DPNP_FN_FMOD][eft_INT][eft_INT] = {eft_INT,
(void*)dpnp_fmod_c_default<int32_t, int32_t, int32_t>};
fmap[DPNPFuncName::DPNP_FN_FMOD][eft_INT][eft_LNG] = {eft_LNG,
Expand Down
33 changes: 33 additions & 0 deletions dpnp/backend/src/dpnp_fptr.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@
#include <map>
#include <complex>

#include <CL/sycl.hpp>

#include <dpnp_iface_fptr.hpp>

/**
Expand Down Expand Up @@ -116,6 +118,31 @@ static constexpr DPNPFuncType populate_func_types()
return (FT1 < FT2) ? FT2 : FT1;
}

/**
* @brief A helper function to cast SYCL vector between types.
*/
template <typename Op, typename Vec, std::size_t... I>
static auto dpnp_vec_cast_impl(const Vec& v, std::index_sequence<I...>)
{
return Op{v[I]...};
}

/**
* @brief A casting function for SYCL vector.
*
* @tparam dstT A result type upon casting.
* @tparam srcT An incoming type of the vector.
* @tparam N A number of elements with the vector.
* @tparam Indices A sequence of integers
* @param s An incoming SYCL vector to cast.
* @return SYCL vector casted to desctination type.
*/
template <typename dstT, typename srcT, std::size_t N, typename Indices = std::make_index_sequence<N>>
static auto dpnp_vec_cast(const sycl::vec<srcT, N>& s)
{
return dpnp_vec_cast_impl<sycl::vec<dstT, N>, sycl::vec<srcT, N>>(s, Indices{});
}

/**
* Removes parentheses for a passed list of types separated by comma.
* It's intended to be used in operations macro.
Expand All @@ -142,6 +169,12 @@ struct are_same : std::conjunction<std::is_same<T, Ts>...> {};
template <typename T1, typename T2, typename... Ts>
constexpr auto both_types_are_same = std::conjunction_v<is_any<T1, Ts...>, are_same<T1, T2>>;

/**
* A template constat to check if both types T1 and T2 match any type from Ts.
*/
template <typename T1, typename T2, typename... Ts>
constexpr auto both_types_are_any_of = std::conjunction_v<is_any<T1, Ts...>, is_any<T2, Ts...>>;

/**
* A template constat to check if both types T1 and T2 don't match any type from Ts sequence.
*/
Expand Down
2 changes: 2 additions & 0 deletions dpnp/dpnp_algo/dpnp_algo.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,8 @@ cdef extern from "dpnp_iface_fptr.hpp":
struct DPNPFuncData:
DPNPFuncType return_type
void * ptr
DPNPFuncType return_type_no_fp64
void *ptr_no_fp64

DPNPFuncData get_dpnp_function_ptr(DPNPFuncName name, DPNPFuncType first_type, DPNPFuncType second_type) except +

Expand Down
Loading