From e26c3f1f3a92321f24cd3646d6a5de9635dc7821 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 3 Feb 2023 05:39:40 -0600 Subject: [PATCH 1/7] dpnp.add() doesn't work properly with a scalar --- tests/skipped_tests.tbl | 6 ++---- tests/skipped_tests_gpu.tbl | 34 ++++------------------------------ 2 files changed, 6 insertions(+), 34 deletions(-) diff --git a/tests/skipped_tests.tbl b/tests/skipped_tests.tbl index 53bdec8af0a4..98ed90a6d890 100644 --- a/tests/skipped_tests.tbl +++ b/tests/skipped_tests.tbl @@ -765,16 +765,14 @@ tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_para tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_547_{arg1=array([[1, 2, 3], [4, 5, 6]]), arg2=array([[0, 1, 2], [3, 4, 5]]), dtype=float64, name='remainder', use_dtype=False}::test_binary tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_549_{arg1=array([[1, 2, 3], [4, 5, 6]]), arg2=array([[0, 1, 2], [3, 4, 5]]), dtype=float64, name='mod', use_dtype=False}::test_binary tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticModf::test_modf -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_0_{name='reciprocal', nargs=1}::test_raises_with_numpy_input + tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_10_{name='remainder', nargs=2}::test_raises_with_numpy_input tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_11_{name='mod', nargs=2}::test_raises_with_numpy_input tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_1_{name='angle', nargs=1}::test_raises_with_numpy_input -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_4_{name='divide', nargs=2}::test_raises_with_numpy_input tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_5_{name='power', nargs=2}::test_raises_with_numpy_input tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_6_{name='subtract', nargs=2}::test_raises_with_numpy_input -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_7_{name='true_divide', nargs=2}::test_raises_with_numpy_input tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_8_{name='floor_divide', nargs=2}::test_raises_with_numpy_input -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_9_{name='fmod', nargs=2}::test_raises_with_numpy_input + tests/third_party/cupy/math_tests/test_arithmetic.py::TestBoolSubtract_param_3_{shape=(), xp=dpnp}::test_bool_subtract tests/third_party/cupy/math_tests/test_explog.py::TestExplog::test_logaddexp tests/third_party/cupy/math_tests/test_explog.py::TestExplog::test_logaddexp2 diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl index af2dbd783a4e..c1bb4aa3b8fa 100644 --- a/tests/skipped_tests_gpu.tbl +++ b/tests/skipped_tests_gpu.tbl @@ -91,18 +91,7 @@ tests/third_party/cupy/indexing_tests/test_insert.py::TestDiagIndicesInvalidValu tests/third_party/cupy/indexing_tests/test_insert.py::TestDiagIndicesFrom_param_0_{shape=(3, 3)}::test_diag_indices_from tests/third_party/cupy/indexing_tests/test_insert.py::TestDiagIndicesFrom_param_1_{shape=(0, 0)}::test_diag_indices_from tests/third_party/cupy/indexing_tests/test_insert.py::TestDiagIndicesFrom_param_2_{shape=(2, 2, 2)}::test_diag_indices_from -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_295_{arg1=array([[1., 2., 3.], [4., 5., 6.]], dtype=float32), arg2=array([[0, 1, 2], [3, 4, 5]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_303_{arg1=array([[1., 2., 3.], [4., 5., 6.]], dtype=float32), arg2=array([[0, 1, 2], [3, 4, 5]], dtype=int64), dtype=float64, name='floor_divide', use_dtype=False}::test_binary -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_375_{arg1=array([[1., 2., 3.], [4., 5., 6.]]), arg2=array([[0, 1, 2], [3, 4, 5]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_383_{arg1=array([[1., 2., 3.], [4., 5., 6.]]), arg2=array([[0, 1, 2], [3, 4, 5]], dtype=int64), dtype=float64, name='floor_divide', use_dtype=False}::test_binary -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_439_{arg1=array([[1, 2, 3], [4, 5, 6]]), arg2=array([[0., 1., 2.], [3., 4., 5.]], dtype=float32), dtype=float64, name='floor_divide', use_dtype=False}::test_binary -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_447_{arg1=array([[1, 2, 3], [4, 5, 6]]), arg2=array([[0., 1., 2.], [3., 4., 5.]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_455_{arg1=array([[1, 2, 3], [4, 5, 6]]), arg2=array([[0, 1, 2], [3, 4, 5]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_463_{arg1=array([[1, 2, 3], [4, 5, 6]]), arg2=array([[0, 1, 2], [3, 4, 5]], dtype=int64), dtype=float64, name='floor_divide', use_dtype=False}::test_binary -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_519_{arg1=array([[1, 2, 3], [4, 5, 6]], dtype=int64), arg2=array([[0., 1., 2.], [3., 4., 5.]], dtype=float32), dtype=float64, name='floor_divide', use_dtype=False}::test_binary -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_527_{arg1=array([[1, 2, 3], [4, 5, 6]], dtype=int64), arg2=array([[0., 1., 2.], [3., 4., 5.]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_535_{arg1=array([[1, 2, 3], [4, 5, 6]], dtype=int64), arg2=array([[0, 1, 2], [3, 4, 5]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_543_{arg1=array([[1, 2, 3], [4, 5, 6]], dtype=int64), arg2=array([[0, 1, 2], [3, 4, 5]], dtype=int64), dtype=float64, name='floor_divide', use_dtype=False}::test_binary + tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_external_prod_all tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_external_prod_axis tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_external_sum_all @@ -969,34 +958,19 @@ tests/third_party/cupy/manipulation_tests/test_tiling.py::TestTile_param_4_{reps tests/third_party/cupy/manipulation_tests/test_tiling.py::TestTile_param_5_{reps=(2, 3, 4, 5)}::test_array_tile tests/third_party/cupy/manipulation_tests/test_transpose.py::TestTranspose::test_moveaxis_invalid5_2 tests/third_party/cupy/manipulation_tests/test_transpose.py::TestTranspose::test_moveaxis_invalid5_3 -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_279_{arg1=array([[1., 2., 3.], [4., 5., 6.]], dtype=float32), arg2=array([[0., 1., 2.], [3., 4., 5.]], dtype=float32), dtype=float64, name='floor_divide', use_dtype=False}::test_binary -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_287_{arg1=array([[1., 2., 3.], [4., 5., 6.]], dtype=float32), arg2=array([[0., 1., 2.], [3., 4., 5.]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_295_{arg1=array([[1., 2., 3.], [4., 5., 6.]], dtype=float32), arg2=array([[0, 1, 2], [3, 4, 5]], dtype=int32), dtype=float64, name='floor_divide', use_dtype=False}::test_binary -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_303_{arg1=array([[1., 2., 3.], [4., 5., 6.]], dtype=float32), arg2=array([[0, 1, 2], [3, 4, 5]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_359_{arg1=array([[1., 2., 3.], [4., 5., 6.]]), arg2=array([[0., 1., 2.], [3., 4., 5.]], dtype=float32), dtype=float64, name='floor_divide', use_dtype=False}::test_binary -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_367_{arg1=array([[1., 2., 3.], [4., 5., 6.]]), arg2=array([[0., 1., 2.], [3., 4., 5.]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_375_{arg1=array([[1., 2., 3.], [4., 5., 6.]]), arg2=array([[0, 1, 2], [3, 4, 5]], dtype=int32), dtype=float64, name='floor_divide', use_dtype=False}::test_binary -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_383_{arg1=array([[1., 2., 3.], [4., 5., 6.]]), arg2=array([[0, 1, 2], [3, 4, 5]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_439_{arg1=array([[1, 2, 3], [4, 5, 6]], dtype=int32), arg2=array([[0., 1., 2.], [3., 4., 5.]], dtype=float32), dtype=float64, name='floor_divide', use_dtype=False}::test_binary -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_447_{arg1=array([[1, 2, 3], [4, 5, 6]], dtype=int32), arg2=array([[0., 1., 2.], [3., 4., 5.]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary + tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_455_{arg1=array([[1, 2, 3], [4, 5, 6]], dtype=int32), arg2=array([[0, 1, 2], [3, 4, 5]], dtype=int32), dtype=float64, name='floor_divide', use_dtype=False}::test_binary tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_463_{arg1=array([[1, 2, 3], [4, 5, 6]], dtype=int32), arg2=array([[0, 1, 2], [3, 4, 5]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_519_{arg1=array([[1, 2, 3], [4, 5, 6]]), arg2=array([[0., 1., 2.], [3., 4., 5.]], dtype=float32), dtype=float64, name='floor_divide', use_dtype=False}::test_binary -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_527_{arg1=array([[1, 2, 3], [4, 5, 6]]), arg2=array([[0., 1., 2.], [3., 4., 5.]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_535_{arg1=array([[1, 2, 3], [4, 5, 6]]), arg2=array([[0, 1, 2], [3, 4, 5]], dtype=int32), dtype=float64, name='floor_divide', use_dtype=False}::test_binary tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_543_{arg1=array([[1, 2, 3], [4, 5, 6]]), arg2=array([[0, 1, 2], [3, 4, 5]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticModf::test_modf -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_0_{name='reciprocal', nargs=1}::test_raises_with_numpy_input + tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_10_{name='remainder', nargs=2}::test_raises_with_numpy_input tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_11_{name='mod', nargs=2}::test_raises_with_numpy_input tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_1_{name='angle', nargs=1}::test_raises_with_numpy_input -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_4_{name='divide', nargs=2}::test_raises_with_numpy_input tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_5_{name='power', nargs=2}::test_raises_with_numpy_input tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_6_{name='subtract', nargs=2}::test_raises_with_numpy_input -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_7_{name='true_divide', nargs=2}::test_raises_with_numpy_input tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_8_{name='floor_divide', nargs=2}::test_raises_with_numpy_input -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_9_{name='fmod', nargs=2}::test_raises_with_numpy_input -tests/third_party/cupy/math_tests/test_arithmetic.py::TestBoolSubtract_param_3_{shape=(), xp=dpnp}::test_bool_subtract + tests/third_party/cupy/math_tests/test_explog.py::TestExplog::test_logaddexp tests/third_party/cupy/math_tests/test_explog.py::TestExplog::test_logaddexp2 tests/third_party/cupy/math_tests/test_floating.py::TestFloating::test_copysign_float From 0b1345da3d35684aefee6775e32521573c7c4a00 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Tue, 7 Feb 2023 07:24:44 -0600 Subject: [PATCH 2/7] dpnp.subtract() doesn't work properly with a scalar --- .../include/dpnp_gen_2arg_3type_tbl.hpp | 6 +- dpnp/backend/kernels/dpnp_krnl_elemwise.cpp | 39 ++------ dpnp/dpnp_array.py | 4 +- dpnp/dpnp_iface_mathematical.py | 83 +++++++++-------- tests/skipped_tests.tbl | 3 +- tests/skipped_tests_gpu.tbl | 4 - tests/test_arraycreation.py | 4 +- tests/test_mathematical.py | 89 ++++++++++++++----- .../cupy/creation_tests/test_from_data.py | 1 + .../cupy/math_tests/test_arithmetic.py | 48 +++++----- 10 files changed, 148 insertions(+), 133 deletions(-) diff --git a/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp b/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp index 33f5e0d19a46..6d8b0a49340e 100644 --- a/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp +++ b/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp @@ -184,9 +184,9 @@ MACRO_2ARG_3TYPES_OP(dpnp_power_c, MACRO_2ARG_3TYPES_OP(dpnp_subtract_c, input1_elem - input2_elem, - nullptr, - std::false_type, + sycl::sub_sat(x1, x2), + MACRO_UNPACK_TYPES(int, long), oneapi::mkl::vm::sub, - MACRO_UNPACK_TYPES(float, double)) + MACRO_UNPACK_TYPES(float, double, std::complex, std::complex)) #undef MACRO_2ARG_3TYPES_OP diff --git a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp index 32097d321a71..31da58c98ff0 100644 --- a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp @@ -1193,6 +1193,12 @@ static void func_map_elemwise_2arg_3type_core(func_map_t& fmap) func_type_map_t::find_type, func_type_map_t::find_type>}), ...); + ((fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][FT1][FTs] = + {populate_func_types(), + (void*)dpnp_subtract_c_ext()>, + func_type_map_t::find_type, + func_type_map_t::find_type>}), + ...); } template @@ -1878,39 +1884,6 @@ static void func_map_init_elemwise_2arg_3type(func_map_t& fmap) fmap[DPNPFuncName::DPNP_FN_SUBTRACT][eft_DBL][eft_DBL] = { eft_DBL, (void*)dpnp_subtract_c_default}; - fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_INT][eft_INT] = { - eft_INT, (void*)dpnp_subtract_c_ext}; - fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_INT][eft_LNG] = { - eft_LNG, (void*)dpnp_subtract_c_ext}; - fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_INT][eft_FLT] = { - eft_DBL, (void*)dpnp_subtract_c_ext}; - fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_INT][eft_DBL] = { - eft_DBL, (void*)dpnp_subtract_c_ext}; - fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_LNG][eft_INT] = { - eft_LNG, (void*)dpnp_subtract_c_ext}; - fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_LNG][eft_LNG] = { - eft_LNG, (void*)dpnp_subtract_c_ext}; - fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_LNG][eft_FLT] = { - eft_DBL, (void*)dpnp_subtract_c_ext}; - fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_LNG][eft_DBL] = { - eft_DBL, (void*)dpnp_subtract_c_ext}; - fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_FLT][eft_INT] = { - eft_DBL, (void*)dpnp_subtract_c_ext}; - fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_FLT][eft_LNG] = { - eft_DBL, (void*)dpnp_subtract_c_ext}; - fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_FLT][eft_FLT] = { - eft_FLT, (void*)dpnp_subtract_c_ext}; - fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_FLT][eft_DBL] = { - eft_DBL, (void*)dpnp_subtract_c_ext}; - fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_DBL][eft_INT] = { - eft_DBL, (void*)dpnp_subtract_c_ext}; - fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_DBL][eft_LNG] = { - eft_DBL, (void*)dpnp_subtract_c_ext}; - fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_DBL][eft_FLT] = { - eft_DBL, (void*)dpnp_subtract_c_ext}; - fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_DBL][eft_DBL] = { - eft_DBL, (void*)dpnp_subtract_c_ext}; - func_map_elemwise_2arg_3type_helper(fmap); return; diff --git a/dpnp/dpnp_array.py b/dpnp/dpnp_array.py index a60464583cab..2843a044b044 100644 --- a/dpnp/dpnp_array.py +++ b/dpnp/dpnp_array.py @@ -270,7 +270,9 @@ def __rmul__(self, other): # '__rpow__', # '__rrshift__', # '__rshift__', - # '__rsub__', + + def __rsub__(self, other): + return dpnp.subtract(other, self) def __rtruediv__(self, other): return dpnp.true_divide(other, self) diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py index e254e916b846..41657fb2d593 100644 --- a/dpnp/dpnp_iface_mathematical.py +++ b/dpnp/dpnp_iface_mathematical.py @@ -215,7 +215,7 @@ def add(x1, if x1_desc and x2_desc: return dpnp_add(x1_desc, x2_desc, dtype=dtype, out=out, where=where).get_pyobj() - return call_origin(numpy.add, x1, x2, dtype=dtype, out=out, where=where, **kwargs) + return call_origin(numpy.add, x1, x2, out=out, where=where, dtype=dtype, subok=subok, **kwargs) def around(x1, decimals=0, out=None): @@ -1145,7 +1145,7 @@ def multiply(x1, if x1_desc and x2_desc: return dpnp_multiply(x1_desc, x2_desc, dtype=dtype, out=out, where=where).get_pyobj() - return call_origin(numpy.multiply, x1, x2, dtype=dtype, out=out, where=where, **kwargs) + return call_origin(numpy.multiply, x1, x2, out=out, where=where, dtype=dtype, subok=subok, **kwargs) def nancumprod(x1, **kwargs): @@ -1520,60 +1520,67 @@ def sign(x1, **kwargs): return call_origin(numpy.sign, x1, **kwargs) -def subtract(x1, x2, dtype=None, out=None, where=True, **kwargs): +def subtract(x1, + x2, + /, + out=None, + *, + where=True, + dtype=None, + subok=True, + **kwargs): """ Subtract arguments, element-wise. For full documentation refer to :obj:`numpy.subtract`. + Returns + ------- + y : dpnp.ndarray + The difference of `x1` and `x2`, element-wise. + Limitations ----------- - Parameters ``x1`` and ``x2`` are supported as either :obj:`dpnp.ndarray` or scalar. - Parameters ``dtype``, ``out`` and ``where`` are supported with their default values. + Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar, + but not both (at least either `x1` or `x2` should be as :class:`dpnp.ndarray`). + Parameters `out`, `where`, `dtype` and `subok` are supported with their default values. Keyword arguments ``kwargs`` are currently unsupported. - Otherwise the functions will be executed sequentially on CPU. + Otherwise the function will be executed sequentially on CPU. Input array data types are limited by supported DPNP :ref:`Data types`. Example ------- - >>> import dpnp as np - >>> result = np.subtract(np.array([4, 3]), np.array([2, 7])) - >>> [x for x in result] + >>> import dpnp as dp + >>> result = dp.subtract(dp.array([4, 3]), dp.array([2, 7])) + >>> print(result) [2, -4] """ - x1_is_scalar = dpnp.isscalar(x1) - x2_is_scalar = dpnp.isscalar(x2) - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False) - x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False) + if out is not None: + pass + elif where is not True: + pass + elif dtype is not None: + pass + elif subok is not True: + pass + elif dpnp.isscalar(x1) and dpnp.isscalar(x2): + # at least either x1 or x2 has to be an array + pass + else: + # get a common queue to copy data from the host into a device if any input is scalar + queue = get_common_allocation_queue([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else None - if x1_desc and x2_desc and not kwargs: - if not x1_desc and not x1_is_scalar: - pass - elif not x2_desc and not x2_is_scalar: - pass - elif x1_is_scalar and x2_is_scalar: - pass - elif x1_desc and x1_desc.ndim == 0: - pass - elif x1_desc and x1_desc.dtype == dpnp.bool: - pass - elif x2_desc and x2_desc.ndim == 0: - pass - elif x2_desc and x2_desc.dtype == dpnp.bool: - pass - elif dtype is not None: - pass - elif out is not None: - pass - elif not where: - pass - else: - out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) if out is not None else None - return dpnp_subtract(x1_desc, x2_desc, dtype=dtype, out=out_desc, where=where).get_pyobj() + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue) + x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue) + if x1_desc and x2_desc: + if x1_desc.dtype == x2_desc.dtype == dpnp.bool: + raise TypeError("DPNP boolean subtract, the `-` operator, is not supported, " + "use the bitwise_xor, the `^` operator, or the logical_xor function instead.") + return dpnp_subtract(x1_desc, x2_desc, dtype=dtype, out=out, where=where).get_pyobj() - return call_origin(numpy.subtract, x1, x2, dtype=dtype, out=out, where=where, **kwargs) + return call_origin(numpy.subtract, x1, x2, out=out, where=where, dtype=dtype, subok=subok, **kwargs) def sum(x1, axis=None, dtype=None, out=None, keepdims=False, initial=None, where=True): diff --git a/tests/skipped_tests.tbl b/tests/skipped_tests.tbl index 98ed90a6d890..690908638b69 100644 --- a/tests/skipped_tests.tbl +++ b/tests/skipped_tests.tbl @@ -389,7 +389,7 @@ tests/third_party/cupy/creation_tests/test_from_data.py::TestFromData::test_asar tests/third_party/cupy/creation_tests/test_from_data.py::TestFromData::test_ascontiguousarray_on_noncontiguous_array tests/third_party/cupy/creation_tests/test_from_data.py::TestFromData::test_asfortranarray_cuda_array_zero_dim tests/third_party/cupy/creation_tests/test_from_data.py::TestFromData::test_asfortranarray_cuda_array_zero_dim_dtype -tests/third_party/cupy/creation_tests/test_from_data.py::TestFromData::test_fromfile + tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_0_{copy=False, indexing='xy', sparse=False}::test_meshgrid0 tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_0_{copy=False, indexing='xy', sparse=False}::test_meshgrid1 tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_0_{copy=False, indexing='xy', sparse=False}::test_meshgrid2 @@ -773,7 +773,6 @@ tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNu tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_6_{name='subtract', nargs=2}::test_raises_with_numpy_input tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_8_{name='floor_divide', nargs=2}::test_raises_with_numpy_input -tests/third_party/cupy/math_tests/test_arithmetic.py::TestBoolSubtract_param_3_{shape=(), xp=dpnp}::test_bool_subtract tests/third_party/cupy/math_tests/test_explog.py::TestExplog::test_logaddexp tests/third_party/cupy/math_tests/test_explog.py::TestExplog::test_logaddexp2 tests/third_party/cupy/math_tests/test_floating.py::TestFloating::test_copysign_float diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl index c1bb4aa3b8fa..932c939743ae 100644 --- a/tests/skipped_tests_gpu.tbl +++ b/tests/skipped_tests_gpu.tbl @@ -18,7 +18,6 @@ tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-conjugate-data2] tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-copy-data3] tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-cumprod-data4] tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-cumsum-data5] -tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-diff-data6] tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-ediff1d-data7] tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-fabs-data8] tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-floor-data9] @@ -29,11 +28,9 @@ tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-conjugate-data2] tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-copy-data3] tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-cumprod-data4] tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-cumsum-data5] -tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-diff-data6] tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-ediff1d-data7] tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-fabs-data8] tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-floor-data9] -tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-gradient-data10] tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-nancumprod-data11] tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-nancumsum-data12] tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-nanprod-data13] @@ -554,7 +551,6 @@ tests/third_party/cupy/creation_tests/test_from_data.py::TestFromData::test_asar tests/third_party/cupy/creation_tests/test_from_data.py::TestFromData::test_ascontiguousarray_on_noncontiguous_array tests/third_party/cupy/creation_tests/test_from_data.py::TestFromData::test_asfortranarray_cuda_array_zero_dim tests/third_party/cupy/creation_tests/test_from_data.py::TestFromData::test_asfortranarray_cuda_array_zero_dim_dtype -tests/third_party/cupy/creation_tests/test_from_data.py::TestFromData::test_fromfile tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_0_{copy=False, indexing='xy', sparse=False}::test_meshgrid0 tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_0_{copy=False, indexing='xy', sparse=False}::test_meshgrid1 diff --git a/tests/test_arraycreation.py b/tests/test_arraycreation.py index 833ea6109c3c..7216f934b3ef 100644 --- a/tests/test_arraycreation.py +++ b/tests/test_arraycreation.py @@ -109,7 +109,7 @@ def test_frombuffer(dtype): @pytest.mark.usefixtures("allow_fall_back_on_numpy") -@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False)) +@pytest.mark.parametrize("dtype", get_all_dtypes()) def test_fromfile(dtype): with tempfile.TemporaryFile() as fh: fh.write(b"\x00\x01\x02\x03\x04\x05\x06\x07\x08") @@ -275,6 +275,7 @@ def test_tri_default_dtype(): '[[1, 2], [3, 4]]', '[[0, 1, 2], [3, 4, 5], [6, 7, 8]]', '[[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]]']) +# TODO: add fixture 'allow_fall_back_on_numpy' and remove operator.index() def test_tril(m, k): a = numpy.array(m) ia = dpnp.array(a) @@ -295,6 +296,7 @@ def test_tril(m, k): '[[1, 2], [3, 4]]', '[[0, 1, 2], [3, 4, 5], [6, 7, 8]]', '[[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]]']) +# TODO: add fixture 'allow_fall_back_on_numpy' and remove operator.index() def test_triu(m, k): a = numpy.array(m) ia = dpnp.array(a) diff --git a/tests/test_mathematical.py b/tests/test_mathematical.py index 70e0bd73dc50..6f7ee58c0380 100644 --- a/tests/test_mathematical.py +++ b/tests/test_mathematical.py @@ -66,7 +66,7 @@ def test_diff(array): @pytest.mark.parametrize("dtype1", get_all_dtypes()) @pytest.mark.parametrize("dtype2", get_all_dtypes()) @pytest.mark.parametrize("func", - ['add', 'multiply']) + ['add', 'multiply', 'subtract']) @pytest.mark.parametrize("data", [[[1, 2], [3, 4]]], ids=['[[1, 2], [3, 4]]']) @@ -77,9 +77,14 @@ def test_op_multiple_dtypes(dtype1, func, dtype2, data): np_b = numpy.array(data, dtype=dtype2) dpnp_b = dpnp.array(data, dtype=dtype2) - result = getattr(dpnp, func)(dpnp_a, dpnp_b) - expected = getattr(numpy, func)(np_a, np_b) - assert_array_equal(result, expected) + if func == 'subtract' and (dtype1 == dtype2 == dpnp.bool): + with pytest.raises(TypeError): + result = getattr(dpnp, func)(dpnp_a, dpnp_b) + expected = getattr(numpy, func)(np_a, np_b) + else: + result = getattr(dpnp, func)(dpnp_a, dpnp_b) + expected = getattr(numpy, func)(np_a, np_b) + assert_array_equal(result, expected) @pytest.mark.parametrize("rhs", [[[1, 2, 3], [4, 5, 6]], [2.0, 1.5, 1.0], 3, 0.3]) @@ -98,15 +103,20 @@ def array_or_scalar(xp, data, dtype=None): return xp.array(data, dtype=dtype) def _test_mathematical(self, name, dtype, lhs, rhs): - a = self.array_or_scalar(dpnp, lhs, dtype=dtype) - b = self.array_or_scalar(dpnp, rhs, dtype=dtype) - result = getattr(dpnp, name)(a, b) + a_dpnp = self.array_or_scalar(dpnp, lhs, dtype=dtype) + b_dpnp = self.array_or_scalar(dpnp, rhs, dtype=dtype) - a = self.array_or_scalar(numpy, lhs, dtype=dtype) - b = self.array_or_scalar(numpy, rhs, dtype=dtype) - expected = getattr(numpy, name)(a, b) + a_np = self.array_or_scalar(numpy, lhs, dtype=dtype) + b_np = self.array_or_scalar(numpy, rhs, dtype=dtype) - assert_allclose(result, expected, atol=1e-4) + if name == 'subtract' and not numpy.isscalar(rhs) and dtype == dpnp.bool: + with pytest.raises(TypeError): + result = getattr(dpnp, name)(a_dpnp, b_dpnp) + expected = getattr(numpy, name)(a_np, b_np) + else: + result = getattr(dpnp, name)(a_dpnp, b_dpnp) + expected = getattr(numpy, name)(a_np, b_np) + assert_allclose(result, expected, atol=1e-4) @pytest.mark.parametrize("dtype", get_all_dtypes()) def test_add(self, dtype, lhs, rhs): @@ -166,8 +176,7 @@ def test_remainder(self, dtype, lhs, rhs): def test_power(self, dtype, lhs, rhs): self._test_mathematical('power', dtype, lhs, rhs) - @pytest.mark.usefixtures("allow_fall_back_on_numpy") - @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True)) + @pytest.mark.parametrize("dtype", get_all_dtypes()) def test_subtract(self, dtype, lhs, rhs): self._test_mathematical('subtract', dtype, lhs, rhs) @@ -177,7 +186,7 @@ def test_subtract(self, dtype, lhs, rhs): ids=['bool', 'int', 'float']) @pytest.mark.parametrize("data_type", get_all_dtypes()) @pytest.mark.parametrize("func", - ['add', 'multiply']) + ['add', 'multiply', 'subtract']) @pytest.mark.parametrize("val", [0, 1, 5], ids=['0', '1', '5']) @@ -197,22 +206,28 @@ def test_op_with_scalar(array, val, func, data_type, val_type): dpnp_a = dpnp.array(array, dtype=data_type) val_ = val_type(val) - result = getattr(dpnp, func)(dpnp_a, val_) - expected = getattr(numpy, func)(np_a, val_) - assert_array_equal(result, expected) + if func == 'subtract' and val_type == bool and data_type == dpnp.bool: + with pytest.raises(TypeError): + result = getattr(dpnp, func)(dpnp_a, val_) + expected = getattr(numpy, func)(np_a, val_) - result = getattr(dpnp, func)(val_, dpnp_a) - expected = getattr(numpy, func)(val_, np_a) - assert_array_equal(result, expected) + result = getattr(dpnp, func)(val_, dpnp_a) + expected = getattr(numpy, func)(val_, np_a) + else: + result = getattr(dpnp, func)(dpnp_a, val_) + expected = getattr(numpy, func)(np_a, val_) + assert_array_equal(result, expected) + + result = getattr(dpnp, func)(val_, dpnp_a) + expected = getattr(numpy, func)(val_, np_a) + assert_array_equal(result, expected) @pytest.mark.parametrize("shape", [(), (3, 2)], ids=['()', '(3, 2)']) -@pytest.mark.parametrize("dtype", - [numpy.float32, numpy.float64], - ids=['numpy.float32', 'numpy.float64']) -def test_multiply_scalar2(shape, dtype): +@pytest.mark.parametrize("dtype", get_all_dtypes()) +def test_multiply_scalar(shape, dtype): np_a = numpy.ones(shape, dtype=dtype) dpnp_a = dpnp.ones(shape, dtype=dtype) @@ -221,6 +236,32 @@ def test_multiply_scalar2(shape, dtype): assert_allclose(result, expected) +@pytest.mark.parametrize("shape", + [(), (3, 2)], + ids=['()', '(3, 2)']) +@pytest.mark.parametrize("dtype", get_all_dtypes()) +def test_add_scalar(shape, dtype): + np_a = numpy.ones(shape, dtype=dtype) + dpnp_a = dpnp.ones(shape, dtype=dtype) + + result = 0.5 + dpnp_a + 1.7 + expected = 0.5 + np_a + 1.7 + assert_allclose(result, expected) + + +@pytest.mark.parametrize("shape", + [(), (3, 2)], + ids=['()', '(3, 2)']) +@pytest.mark.parametrize("dtype", get_all_dtypes()) +def test_subtract_scalar(shape, dtype): + np_a = numpy.ones(shape, dtype=dtype) + dpnp_a = dpnp.ones(shape, dtype=dtype) + + result = 0.5 - dpnp_a - 1.7 + expected = 0.5 - np_a - 1.7 + assert_allclose(result, expected) + + @pytest.mark.usefixtures("allow_fall_back_on_numpy") @pytest.mark.parametrize("array", [[1, 2, 3, 4, 5], [1, 2, numpy.nan, 4, 5], diff --git a/tests/third_party/cupy/creation_tests/test_from_data.py b/tests/third_party/cupy/creation_tests/test_from_data.py index e07d927b1cf0..ce71ef311a56 100644 --- a/tests/third_party/cupy/creation_tests/test_from_data.py +++ b/tests/third_party/cupy/creation_tests/test_from_data.py @@ -454,6 +454,7 @@ def test_asfortranarray_cuda_array_zero_dim_dtype( a = xp.ones((), dtype=dtype_a) return xp.asfortranarray(a, dtype=dtype_b) + @pytest.mark.usefixtures("allow_fall_back_on_numpy") @testing.numpy_cupy_array_equal() def test_fromfile(self, xp): with tempfile.TemporaryFile() as fh: diff --git a/tests/third_party/cupy/math_tests/test_arithmetic.py b/tests/third_party/cupy/math_tests/test_arithmetic.py index a53a8494707c..21068ece8749 100644 --- a/tests/third_party/cupy/math_tests/test_arithmetic.py +++ b/tests/third_party/cupy/math_tests/test_arithmetic.py @@ -146,31 +146,27 @@ def check_binary(self, xp): y = y.astype(numpy.complex64) # NumPy returns an output array of another type than DPNP when input ones have diffrent types. - if self.name in ('add', 'multiply') and xp is cupy: - if xp.isscalar(arg1) and xp.isscalar(arg2): - # If both are scalars, the result will be a scalar, so needs to convert into numpy-scalar. - y = numpy.asarray(y) - elif dtype1 != dtype2: - is_array_arg1 = not xp.isscalar(arg1) - is_array_arg2 = not xp.isscalar(arg2) - - is_int_float = lambda _x, _y: numpy.issubdtype(_x, numpy.integer) and numpy.issubdtype(_y, numpy.floating) - is_same_type = lambda _x, _y, _type: numpy.issubdtype(_x, _type) and numpy.issubdtype(_y, _type) - - if is_array_arg1 and is_array_arg2: - # If both inputs are arrays where one is of floating type and another - integer, - # NumPy will return an output array of always "float64" type, - # while DPNP will return the array of a wider type from the input arrays. - if is_int_float(dtype1, dtype2) or is_int_float(dtype2, dtype1): - y = y.astype(numpy.float64) - elif is_same_type(dtype1, dtype2, numpy.floating) or is_same_type(dtype1, dtype2, numpy.integer): - # If one input is an array and another - scalar, - # NumPy will return an output array of the same type as the inpupt array has, - # while DPNP will return the array of a wider type from the inputs (considering both array and scalar). - if is_array_arg1 and not is_array_arg2: - y = y.astype(dtype1) - elif is_array_arg2 and not is_array_arg1: - y = y.astype(dtype2) + if self.name in ('add', 'multiply', 'subtract') and xp is cupy and dtype1 != dtype2 and not self.use_dtype: + is_array_arg1 = not xp.isscalar(arg1) + is_array_arg2 = not xp.isscalar(arg2) + + is_int_float = lambda _x, _y: numpy.issubdtype(_x, numpy.integer) and numpy.issubdtype(_y, numpy.floating) + is_same_type = lambda _x, _y, _type: numpy.issubdtype(_x, _type) and numpy.issubdtype(_y, _type) + + if is_array_arg1 and is_array_arg2: + # If both inputs are arrays where one is of floating type and another - integer, + # NumPy will return an output array of always "float64" type, + # while DPNP will return the array of a wider type from the input arrays. + if is_int_float(dtype1, dtype2) or is_int_float(dtype2, dtype1): + y = y.astype(numpy.float64) + elif is_same_type(dtype1, dtype2, numpy.floating) or is_same_type(dtype1, dtype2, numpy.integer): + # If one input is an array and another - scalar, + # NumPy will return an output array of the same type as the inpupt array has, + # while DPNP will return the array of a wider type from the inputs (considering both array and scalar). + if is_array_arg1 and not is_array_arg2: + y = y.astype(dtype1) + elif is_array_arg2 and not is_array_arg1: + y = y.astype(dtype2) # NumPy returns different values (nan/inf) on division by zero # depending on the architecture. @@ -188,7 +184,6 @@ def check_binary(self, xp): @testing.gpu @testing.parameterize(*( testing.product({ - # TODO(unno): boolean subtract causes DeprecationWarning in numpy>=1.13 'arg1': [testing.shaped_arange((2, 3), numpy, dtype=d) for d in all_types ] + [0, 0.0, 2, 2.0], @@ -283,7 +278,6 @@ def test_modf(self, xp, dtype): 'xp': [numpy, cupy], 'shape': [(3, 2), (), (3, 0, 2)] })) -@pytest.mark.usefixtures("allow_fall_back_on_numpy") @testing.gpu class TestBoolSubtract(unittest.TestCase): From 8c33cbbafb903be0b1960721b1f5bd8ca712ca4f Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Wed, 8 Feb 2023 13:58:35 -0600 Subject: [PATCH 3/7] dpnp.divide() doesn't work properly with a scalar --- .../include/dpnp_gen_2arg_3type_tbl.hpp | 6 +- dpnp/backend/include/dpnp_iface_fptr.hpp | 22 ++++- dpnp/backend/kernels/dpnp_krnl_elemwise.cpp | 84 +++++++++++-------- dpnp/dpnp_algo/dpnp_algo.pxd | 2 + dpnp/dpnp_algo/dpnp_algo.pyx | 14 +++- dpnp/dpnp_iface_mathematical.py | 69 ++++++++------- tests/conftest.py | 21 ++++- tests/helper.py | 2 +- tests/test_linalg.py | 71 ++++++++-------- tests/test_mathematical.py | 27 ++++-- tests/test_strides.py | 1 + .../cupy/math_tests/test_arithmetic.py | 38 +++++---- .../cupy/statistics_tests/test_meanvar.py | 5 +- 13 files changed, 227 insertions(+), 135 deletions(-) diff --git a/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp b/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp index 6d8b0a49340e..850f3c7213c2 100644 --- a/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp +++ b/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp @@ -132,10 +132,10 @@ MACRO_2ARG_3TYPES_OP(dpnp_copysign_c, MACRO_2ARG_3TYPES_OP(dpnp_divide_c, input1_elem / input2_elem, - nullptr, - std::false_type, + sycl::native::divide(x1, x2), + MACRO_UNPACK_TYPES(float, double), oneapi::mkl::vm::div, - MACRO_UNPACK_TYPES(float, double)) + MACRO_UNPACK_TYPES(float, double, std::complex, std::complex)) MACRO_2ARG_3TYPES_OP(dpnp_fmod_c, sycl::fmod((double)input1_elem, (double)input2_elem), diff --git a/dpnp/backend/include/dpnp_iface_fptr.hpp b/dpnp/backend/include/dpnp_iface_fptr.hpp index 61c1c9838ad6..0637bfcfc9bc 100644 --- a/dpnp/backend/include/dpnp_iface_fptr.hpp +++ b/dpnp/backend/include/dpnp_iface_fptr.hpp @@ -419,8 +419,26 @@ size_t operator-(DPNPFuncType lhs, DPNPFuncType rhs); */ typedef struct DPNPFuncData { - DPNPFuncType return_type; /**< return type identifier which expected by the @ref ptr function */ - void* ptr; /**< C++ backend function pointer */ + DPNPFuncData(const DPNPFuncType gen_type, void* gen_ptr, const DPNPFuncType type_no_fp64, void* ptr_no_fp64) + : return_type(gen_type) + , ptr(gen_ptr) + , return_type_no_fp64(type_no_fp64) + , ptr_no_fp64(ptr_no_fp64) + { + } + DPNPFuncData(const DPNPFuncType gen_type, void* gen_ptr) + : DPNPFuncData(gen_type, gen_ptr, DPNPFuncType::DPNP_FT_NONE, nullptr) + { + } + DPNPFuncData() + : DPNPFuncData(DPNPFuncType::DPNP_FT_NONE, nullptr) + { + } + + DPNPFuncType return_type; /**< return type identifier which expected by the @ref ptr function */ + void* ptr; /**< C++ backend function pointer */ + DPNPFuncType return_type_no_fp64; /**< alternative return type identifier when no fp64 support by device */ + void* ptr_no_fp64; /**< alternative C++ backend function pointer when no fp64 support by device */ } DPNPFuncData_t; /** diff --git a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp index 31da58c98ff0..097cab021d6c 100644 --- a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp @@ -1178,6 +1178,47 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap) #include +template +static constexpr DPNPFuncType get_divide_res_type() +{ + constexpr auto widest_type = populate_func_types(); + constexpr auto shortes_type = (widest_type == FT1) ? FT2 : FT1; + + if constexpr (widest_type == DPNPFuncType::DPNP_FT_CMPLX128 || widest_type == DPNPFuncType::DPNP_FT_DOUBLE) + { + return widest_type; + } + else if constexpr (widest_type == DPNPFuncType::DPNP_FT_CMPLX64) + { + if constexpr (shortes_type == DPNPFuncType::DPNP_FT_DOUBLE) + { + return DPNPFuncType::DPNP_FT_CMPLX128; + } + else if constexpr (has_fp64::value && + (shortes_type == DPNPFuncType::DPNP_FT_INT || shortes_type == DPNPFuncType::DPNP_FT_LONG)) + { + return DPNPFuncType::DPNP_FT_CMPLX128; + } + } + else if constexpr (widest_type == DPNPFuncType::DPNP_FT_FLOAT) + { + if constexpr (has_fp64::value && + (shortes_type == DPNPFuncType::DPNP_FT_INT || shortes_type == DPNPFuncType::DPNP_FT_LONG)) + { + return DPNPFuncType::DPNP_FT_DOUBLE; + } + } + else if constexpr (has_fp64::value) + { + return DPNPFuncType::DPNP_FT_DOUBLE; + } + else + { + return DPNPFuncType::DPNP_FT_FLOAT; + } + return widest_type; +} + template static void func_map_elemwise_2arg_3type_core(func_map_t& fmap) { @@ -1199,6 +1240,16 @@ static void func_map_elemwise_2arg_3type_core(func_map_t& fmap) func_type_map_t::find_type, func_type_map_t::find_type>}), ...); + ((fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][FT1][FTs] = + {get_divide_res_type(), + (void*)dpnp_divide_c_ext()>, + func_type_map_t::find_type, + func_type_map_t::find_type>, + get_divide_res_type(), + (void*)dpnp_divide_c_ext()>, + func_type_map_t::find_type, + func_type_map_t::find_type>}), + ...); } template @@ -1407,39 +1458,6 @@ static void func_map_init_elemwise_2arg_3type(func_map_t& fmap) fmap[DPNPFuncName::DPNP_FN_DIVIDE][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_divide_c_default}; - fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_INT][eft_INT] = {eft_DBL, - (void*)dpnp_divide_c_ext}; - fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_INT][eft_LNG] = {eft_DBL, - (void*)dpnp_divide_c_ext}; - fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_INT][eft_FLT] = {eft_DBL, - (void*)dpnp_divide_c_ext}; - fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_INT][eft_DBL] = {eft_DBL, - (void*)dpnp_divide_c_ext}; - fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_LNG][eft_INT] = {eft_DBL, - (void*)dpnp_divide_c_ext}; - fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_LNG][eft_LNG] = {eft_DBL, - (void*)dpnp_divide_c_ext}; - fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_LNG][eft_FLT] = {eft_DBL, - (void*)dpnp_divide_c_ext}; - fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_LNG][eft_DBL] = {eft_DBL, - (void*)dpnp_divide_c_ext}; - fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_FLT][eft_INT] = {eft_DBL, - (void*)dpnp_divide_c_ext}; - fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_FLT][eft_LNG] = {eft_DBL, - (void*)dpnp_divide_c_ext}; - fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_FLT][eft_FLT] = {eft_FLT, - (void*)dpnp_divide_c_ext}; - fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_FLT][eft_DBL] = {eft_DBL, - (void*)dpnp_divide_c_ext}; - fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_DBL][eft_INT] = {eft_DBL, - (void*)dpnp_divide_c_ext}; - fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_DBL][eft_LNG] = {eft_DBL, - (void*)dpnp_divide_c_ext}; - fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_DBL][eft_FLT] = {eft_DBL, - (void*)dpnp_divide_c_ext}; - fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_DBL][eft_DBL] = {eft_DBL, - (void*)dpnp_divide_c_ext}; - fmap[DPNPFuncName::DPNP_FN_FMOD][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_fmod_c_default}; fmap[DPNPFuncName::DPNP_FN_FMOD][eft_INT][eft_LNG] = {eft_LNG, diff --git a/dpnp/dpnp_algo/dpnp_algo.pxd b/dpnp/dpnp_algo/dpnp_algo.pxd index 485e8adb1a66..65e07a9c7046 100644 --- a/dpnp/dpnp_algo/dpnp_algo.pxd +++ b/dpnp/dpnp_algo/dpnp_algo.pxd @@ -374,6 +374,8 @@ cdef extern from "dpnp_iface_fptr.hpp": struct DPNPFuncData: DPNPFuncType return_type void * ptr + DPNPFuncType return_type_no_fp64 + void *ptr_no_fp64 DPNPFuncData get_dpnp_function_ptr(DPNPFuncName name, DPNPFuncType first_type, DPNPFuncType second_type) except + diff --git a/dpnp/dpnp_algo/dpnp_algo.pyx b/dpnp/dpnp_algo/dpnp_algo.pyx index aaa7334e18a8..54e71b87d767 100644 --- a/dpnp/dpnp_algo/dpnp_algo.pyx +++ b/dpnp/dpnp_algo/dpnp_algo.pyx @@ -481,8 +481,6 @@ cdef utils.dpnp_descriptor call_fptr_2in_1out_strides(DPNPFuncName fptr_name, # get the FPTR data structure cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(fptr_name, x1_c_type, x2_c_type) - result_type = dpnp_DPNPFuncType_to_dtype( < size_t > kernel_data.return_type) - # Create result array cdef shape_type_c x1_shape = x1_obj.shape @@ -495,6 +493,15 @@ cdef utils.dpnp_descriptor call_fptr_2in_1out_strides(DPNPFuncName fptr_name, result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(x1_obj, x2_obj) + # get FPTR function and result type + cdef fptr_2in_1out_strides_t func = NULL + if fptr_name != DPNP_FN_DIVIDE_EXT or result_sycl_device.has_aspect_fp64: + result_type = dpnp_DPNPFuncType_to_dtype(< size_t > kernel_data.return_type) + func = < fptr_2in_1out_strides_t > kernel_data.ptr + else: + result_type = dpnp_DPNPFuncType_to_dtype(< size_t > kernel_data.return_type_no_fp64) + func = < fptr_2in_1out_strides_t > kernel_data.ptr_no_fp64 + if out is None: """ Create result array with type given by FPTR data """ result = utils.create_output_descriptor(result_shape, @@ -517,11 +524,10 @@ cdef utils.dpnp_descriptor call_fptr_2in_1out_strides(DPNPFuncName fptr_name, result_obj = result.get_array() - cdef c_dpctl.SyclQueue q = result_obj.sycl_queue + cdef c_dpctl.SyclQueue q = < c_dpctl.SyclQueue > result_obj.sycl_queue cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() """ Call FPTR function """ - cdef fptr_2in_1out_strides_t func = kernel_data.ptr cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, result.get_data(), result.size, diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py index 41657fb2d593..af57819827c9 100644 --- a/dpnp/dpnp_iface_mathematical.py +++ b/dpnp/dpnp_iface_mathematical.py @@ -544,55 +544,64 @@ def diff(x1, n=1, axis=-1, prepend=numpy._NoValue, append=numpy._NoValue): return call_origin(numpy.diff, x1, n=n, axis=axis, prepend=prepend, append=append) -def divide(x1, x2, dtype=None, out=None, where=True, **kwargs): +def divide(x1, + x2, + /, + out=None, + *, + where=True, + dtype=None, + subok=True, + **kwargs): """ Divide arguments element-wise. For full documentation refer to :obj:`numpy.divide`. + Returns + ------- + y : dpnp.ndarray + The quotient ``x1/x2``, element-wise. + Limitations ----------- - Parameters ``x1`` and ``x2`` are supported as either :obj:`dpnp.ndarray` or scalar. - Parameters ``dtype``, ``out`` and ``where`` are supported with their default values. + Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar, + but not both (at least either `x1` or `x2` should be as :class:`dpnp.ndarray`). + Parameters `out`, `where`, `dtype` and `subok` are supported with their default values. Keyword arguments ``kwargs`` are currently unsupported. - Otherwise the functions will be executed sequentially on CPU. + Otherwise the function will be executed sequentially on CPU. Input array data types are limited by supported DPNP :ref:`Data types`. Examples -------- - >>> import dpnp as np - >>> result = np.divide(np.array([1, -2, 6, -9]), np.array([-2, -2, -2, -2])) - >>> [x for x in result] + >>> import dpnp as dp + >>> result = dp.divide(dp.array([1, -2, 6, -9]), dp.array([-2, -2, -2, -2])) + >>> print(result) [-0.5, 1.0, -3.0, 4.5] """ - x1_is_scalar = dpnp.isscalar(x1) - x2_is_scalar = dpnp.isscalar(x2) - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False) - x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False) + if out is not None: + pass + elif where is not True: + pass + elif dtype is not None: + pass + elif subok is not True: + pass + elif dpnp.isscalar(x1) and dpnp.isscalar(x2): + # at least either x1 or x2 has to be an array + pass + else: + # get a common queue to copy data from the host into a device if any input is scalar + queue = get_common_allocation_queue([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else None - if x1_desc and x2_desc and not kwargs: - if not x1_desc and not x1_is_scalar: - pass - elif not x2_desc and not x2_is_scalar: - pass - elif x1_is_scalar and x2_is_scalar: - pass - elif x1_desc and x1_desc.ndim == 0: - pass - elif x2_desc and x2_desc.ndim == 0: - pass - elif dtype is not None: - pass - elif out is not None: - pass - elif not where: - pass - else: + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue) + x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue) + if x1_desc and x2_desc: return dpnp_divide(x1_desc, x2_desc, dtype=dtype, out=out, where=where).get_pyobj() - return call_origin(numpy.divide, x1, x2, dtype=dtype, out=out, where=where, **kwargs) + return call_origin(numpy.divide, x1, x2, out=out, where=where, dtype=dtype, subok=subok, **kwargs) def ediff1d(x1, to_end=None, to_begin=None): diff --git a/tests/conftest.py b/tests/conftest.py index 78d3180bac08..22276f125f26 100755 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # ***************************************************************************** -# Copyright (c) 2016-2020, Intel Corporation +# Copyright (c) 2016-2023, Intel Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -77,3 +77,22 @@ def pytest_collection_modifyitems(config, items): @pytest.fixture def allow_fall_back_on_numpy(monkeypatch): monkeypatch.setattr(dpnp.config, '__DPNP_RAISE_EXCEPION_ON_NUMPY_FALLBACK__', 0) + +@pytest.fixture +def suppress_divide_numpy_warnings(): + # divide: treatment for division by zero (infinite result obtained from finite numbers) + old_settings = numpy.seterr(divide='ignore') + yield + numpy.seterr(**old_settings) # reset to default + +@pytest.fixture +def suppress_invalid_numpy_warnings(): + # invalid: treatment for invalid floating-point operation + # (result is not an expressible number, typically indicates that a NaN was produced) + old_settings = numpy.seterr(invalid='ignore') + yield + numpy.seterr(**old_settings) # reset to default + +@pytest.fixture +def suppress_divide_invalid_numpy_warnings(suppress_divide_numpy_warnings, suppress_invalid_numpy_warnings): + yield diff --git a/tests/helper.py b/tests/helper.py index be550a995dce..17c62cecd289 100644 --- a/tests/helper.py +++ b/tests/helper.py @@ -32,7 +32,7 @@ def get_all_dtypes(no_bool=False, dtypes.append(dpnp.complex64) if dev.has_aspect_fp64: dtypes.append(dpnp.complex128) - + # add None value to validate a default dtype if not no_none: dtypes.append(None) diff --git a/tests/test_linalg.py b/tests/test_linalg.py index ac8392d15384..d9784a41558f 100644 --- a/tests/test_linalg.py +++ b/tests/test_linalg.py @@ -1,9 +1,15 @@ import pytest +from .helper import get_all_dtypes import dpnp as inp import dpctl + import numpy +from numpy.testing import ( + assert_allclose, + assert_array_equal +) def vvsort(val, vec, size, xp): @@ -49,7 +55,7 @@ def test_cholesky(array): ia = inp.array(a) result = inp.linalg.cholesky(ia) expected = numpy.linalg.cholesky(a) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) @pytest.mark.parametrize("arr", @@ -63,7 +69,7 @@ def test_cond(arr, p): ia = inp.array(a) result = inp.linalg.cond(ia, p) expected = numpy.linalg.cond(a, p) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) @pytest.mark.parametrize("array", @@ -82,13 +88,11 @@ def test_det(array): ia = inp.array(a) result = inp.linalg.det(ia) expected = numpy.linalg.det(a) - numpy.testing.assert_allclose(expected, result) + assert_allclose(expected, result) @pytest.mark.usefixtures("allow_fall_back_on_numpy") -@pytest.mark.parametrize("type", - [numpy.float64, numpy.float32, numpy.int64, numpy.int32], - ids=['float64', 'float32', 'int64', 'int32']) +@pytest.mark.parametrize("type", get_all_dtypes(no_bool=True, no_complex=True)) @pytest.mark.parametrize("size", [2, 4, 8, 16, 300]) def test_eig_arange(type, size): @@ -115,21 +119,19 @@ def test_eig_arange(type, size): if np_vec[0, i] * dpnp_vec[0, i] < 0: np_vec[:, i] = -np_vec[:, i] - numpy.testing.assert_array_equal(symm_orig, symm) - numpy.testing.assert_array_equal(dpnp_symm_orig, dpnp_symm) + assert_array_equal(symm_orig, symm) + assert_array_equal(dpnp_symm_orig, dpnp_symm) assert (dpnp_val.dtype == np_val.dtype) assert (dpnp_vec.dtype == np_vec.dtype) assert (dpnp_val.shape == np_val.shape) assert (dpnp_vec.shape == np_vec.shape) - numpy.testing.assert_allclose(dpnp_val, np_val, rtol=1e-05, atol=1e-05) - numpy.testing.assert_allclose(dpnp_vec, np_vec, rtol=1e-05, atol=1e-05) + assert_allclose(dpnp_val, np_val, rtol=1e-05, atol=1e-05) + assert_allclose(dpnp_vec, np_vec, rtol=1e-05, atol=1e-05) -@pytest.mark.parametrize("type", - [numpy.float64, numpy.float32, numpy.int64, numpy.int32], - ids=['float64', 'float32', 'int64', 'int32']) +@pytest.mark.parametrize("type", get_all_dtypes(no_bool=True, no_complex=True)) def test_eigvals(type): if dpctl.get_current_device_type() != dpctl.device_type.gpu: pytest.skip("eigvals function doesn\'t work on CPU: https://github.com/IntelPython/dpnp/issues/1005") @@ -144,12 +146,10 @@ def test_eigvals(type): ia = inp.array(a) result = inp.linalg.eigvals(ia) expected = numpy.linalg.eigvals(a) - numpy.testing.assert_allclose(expected, result, atol=0.5) + assert_allclose(expected, result, atol=0.5) -@pytest.mark.parametrize("type", - [numpy.float64, numpy.float32, numpy.int64, numpy.int32], - ids=['float64', 'float32', 'int64', 'int32']) +@pytest.mark.parametrize("type", get_all_dtypes(no_bool=True, no_complex=True)) @pytest.mark.parametrize("array", [[[1., 2.], [3., 4.]], [[0, 1, 2], [3, 2, -1], [4, -2, 3]]], ids=['[[1., 2.], [3., 4.]]', '[[0, 1, 2], [3, 2, -1], [4, -2, 3]]']) @@ -158,12 +158,10 @@ def test_inv(type, array): ia = inp.array(a) result = inp.linalg.inv(ia) expected = numpy.linalg.inv(a) - numpy.testing.assert_allclose(expected, result) + assert_allclose(expected, result) -@pytest.mark.parametrize("type", - [numpy.float64, numpy.float32, numpy.int64, numpy.int32], - ids=['float64', 'float32', 'int64', 'int32']) +@pytest.mark.parametrize("type", get_all_dtypes(no_bool=True, no_complex=True, no_none=True)) @pytest.mark.parametrize("array", [[0, 0], [0, 1], [1, 2], [[0, 0], [0, 0]], [[1, 2], [1, 2]], [[1, 2], [3, 4]]], ids=['[0, 0]', '[0, 1]', '[1, 2]', '[[0, 0], [0, 0]]', '[[1, 2], [1, 2]]', '[[1, 2], [3, 4]]']) @@ -177,10 +175,11 @@ def test_matrix_rank(type, tol, array): result = inp.linalg.matrix_rank(ia, tol=tol) expected = numpy.linalg.matrix_rank(a, tol=tol) - numpy.testing.assert_allclose(expected, result) + assert_allclose(expected, result) @pytest.mark.usefixtures("allow_fall_back_on_numpy") +@pytest.mark.usefixtures("suppress_divide_numpy_warnings") @pytest.mark.parametrize("array", [[7], [1, 2], [1, 0]], ids=['[7]', '[1, 2]', '[1, 0]']) @@ -195,7 +194,7 @@ def test_norm1(array, ord, axis): ia = inp.array(a) result = inp.linalg.norm(ia, ord=ord, axis=axis) expected = numpy.linalg.norm(a, ord=ord, axis=axis) - numpy.testing.assert_allclose(expected, result) + assert_allclose(expected, result) @pytest.mark.usefixtures("allow_fall_back_on_numpy") @@ -213,7 +212,7 @@ def test_norm2(array, ord, axis): ia = inp.array(a) result = inp.linalg.norm(ia, ord=ord, axis=axis) expected = numpy.linalg.norm(a, ord=ord, axis=axis) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) @pytest.mark.usefixtures("allow_fall_back_on_numpy") @@ -231,13 +230,11 @@ def test_norm3(array, ord, axis): ia = inp.array(a) result = inp.linalg.norm(ia, ord=ord, axis=axis) expected = numpy.linalg.norm(a, ord=ord, axis=axis) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) @pytest.mark.usefixtures("allow_fall_back_on_numpy") -@pytest.mark.parametrize("type", - [numpy.float64, numpy.float32, numpy.int64, numpy.int32], - ids=['float64', 'float32', 'int64', 'int32']) +@pytest.mark.parametrize("type", get_all_dtypes(no_bool=True, no_complex=True)) @pytest.mark.parametrize("shape", [(2, 2), (3, 4), (5, 3), (16, 16)], ids=['(2,2)', '(3,4)', '(5,3)', '(16,16)']) @@ -262,7 +259,7 @@ def test_qr(type, shape, mode): tol = 1e-11 # check decomposition - numpy.testing.assert_allclose(ia, numpy.dot(inp.asnumpy(dpnp_q), inp.asnumpy(dpnp_r)), rtol=tol, atol=tol) + assert_allclose(ia, numpy.dot(inp.asnumpy(dpnp_q), inp.asnumpy(dpnp_r)), rtol=tol, atol=tol) # NP change sign for comparison ncols = min(a.shape[0], a.shape[1]) @@ -273,14 +270,12 @@ def test_qr(type, shape, mode): np_r[i, :] = -np_r[i, :] if numpy.any(numpy.abs(np_r[i, :]) > tol): - numpy.testing.assert_allclose(inp.asnumpy(dpnp_q)[:, i], np_q[:, i], rtol=tol, atol=tol) + assert_allclose(inp.asnumpy(dpnp_q)[:, i], np_q[:, i], rtol=tol, atol=tol) - numpy.testing.assert_allclose(dpnp_r, np_r, rtol=tol, atol=tol) + assert_allclose(dpnp_r, np_r, rtol=tol, atol=tol) -@pytest.mark.parametrize("type", - [numpy.float64, numpy.float32, numpy.int64, numpy.int32], - ids=['float64', 'float32', 'int64', 'int32']) +@pytest.mark.parametrize("type", get_all_dtypes(no_bool=True, no_complex=True)) @pytest.mark.parametrize("shape", [(2, 2), (3, 4), (5, 3), (16, 16)], ids=['(2,2)', '(3,4)', '(5,3)', '(16,16)']) @@ -309,10 +304,10 @@ def test_svd(type, shape): dpnp_diag_s[i, i] = dpnp_s[i] # check decomposition - numpy.testing.assert_allclose(ia, inp.dot(dpnp_u, inp.dot(dpnp_diag_s, dpnp_vt)), rtol=tol, atol=tol) + assert_allclose(ia, inp.dot(dpnp_u, inp.dot(dpnp_diag_s, dpnp_vt)), rtol=tol, atol=tol) # compare singular values - # numpy.testing.assert_allclose(dpnp_s, np_s, rtol=tol, atol=tol) + # assert_allclose(dpnp_s, np_s, rtol=tol, atol=tol) # change sign of vectors for i in range(min(shape[0], shape[1])): @@ -322,5 +317,5 @@ def test_svd(type, shape): # compare vectors for non-zero values for i in range(numpy.count_nonzero(np_s > tol)): - numpy.testing.assert_allclose(inp.asnumpy(dpnp_u)[:, i], np_u[:, i], rtol=tol, atol=tol) - numpy.testing.assert_allclose(inp.asnumpy(dpnp_vt)[i, :], np_vt[i, :], rtol=tol, atol=tol) + assert_allclose(inp.asnumpy(dpnp_u)[:, i], np_u[:, i], rtol=tol, atol=tol) + assert_allclose(inp.asnumpy(dpnp_vt)[i, :], np_vt[i, :], rtol=tol, atol=tol) diff --git a/tests/test_mathematical.py b/tests/test_mathematical.py index 6f7ee58c0380..78f628908337 100644 --- a/tests/test_mathematical.py +++ b/tests/test_mathematical.py @@ -66,7 +66,7 @@ def test_diff(array): @pytest.mark.parametrize("dtype1", get_all_dtypes()) @pytest.mark.parametrize("dtype2", get_all_dtypes()) @pytest.mark.parametrize("func", - ['add', 'multiply', 'subtract']) + ['add', 'multiply', 'subtract', 'divide']) @pytest.mark.parametrize("data", [[[1, 2], [3, 4]]], ids=['[[1, 2], [3, 4]]']) @@ -132,8 +132,7 @@ def test_arctan2(self, dtype, lhs, rhs): def test_copysign(self, dtype, lhs, rhs): self._test_mathematical('copysign', dtype, lhs, rhs) - @pytest.mark.usefixtures("allow_fall_back_on_numpy") - @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True)) + @pytest.mark.parametrize("dtype", get_all_dtypes()) def test_divide(self, dtype, lhs, rhs): self._test_mathematical('divide', dtype, lhs, rhs) @@ -181,12 +180,13 @@ def test_subtract(self, dtype, lhs, rhs): self._test_mathematical('subtract', dtype, lhs, rhs) +@pytest.mark.usefixtures("suppress_divide_invalid_numpy_warnings") @pytest.mark.parametrize("val_type", [bool, int, float], ids=['bool', 'int', 'float']) @pytest.mark.parametrize("data_type", get_all_dtypes()) @pytest.mark.parametrize("func", - ['add', 'multiply', 'subtract']) + ['add', 'multiply', 'subtract', 'divide']) @pytest.mark.parametrize("val", [0, 1, 5], ids=['0', '1', '5']) @@ -216,11 +216,11 @@ def test_op_with_scalar(array, val, func, data_type, val_type): else: result = getattr(dpnp, func)(dpnp_a, val_) expected = getattr(numpy, func)(np_a, val_) - assert_array_equal(result, expected) + assert_allclose(result, expected) result = getattr(dpnp, func)(val_, dpnp_a) expected = getattr(numpy, func)(val_, np_a) - assert_array_equal(result, expected) + assert_allclose(result, expected) @pytest.mark.parametrize("shape", @@ -262,6 +262,19 @@ def test_subtract_scalar(shape, dtype): assert_allclose(result, expected) +@pytest.mark.parametrize("shape", + [(), (3, 2)], + ids=['()', '(3, 2)']) +@pytest.mark.parametrize("dtype", get_all_dtypes()) +def test_divide_scalar(shape, dtype): + np_a = numpy.ones(shape, dtype=dtype) + dpnp_a = dpnp.ones(shape, dtype=dtype) + + result = 0.5 / dpnp_a / 1.7 + expected = 0.5 / np_a / 1.7 + assert_allclose(result, expected) + + @pytest.mark.usefixtures("allow_fall_back_on_numpy") @pytest.mark.parametrize("array", [[1, 2, 3, 4, 5], [1, 2, numpy.nan, 4, 5], @@ -442,7 +455,6 @@ def test_cross_3x3(self, x1, x2, axisa, axisb, axisc, axis): assert_array_equal(expected, result) -@pytest.mark.usefixtures("allow_fall_back_on_numpy") class TestGradient: @pytest.mark.parametrize("array", [[2, 3, 6, 8, 4, 9], @@ -456,6 +468,7 @@ def test_gradient_y1(self, array): expected = numpy.gradient(np_y) assert_array_equal(expected, result) + @pytest.mark.usefixtures("allow_fall_back_on_numpy") @pytest.mark.parametrize("array", [[2, 3, 6, 8, 4, 9], [3., 4., 7.5, 9.], [2, 6, 8, 10]]) diff --git a/tests/test_strides.py b/tests/test_strides.py index 3c0d86a44a5a..02e8c8689757 100644 --- a/tests/test_strides.py +++ b/tests/test_strides.py @@ -37,6 +37,7 @@ def test_strides(func_name, dtype): assert_allclose(expected, result) +@pytest.mark.usefixtures("suppress_divide_invalid_numpy_warnings") @pytest.mark.parametrize("func_name", ["arccos", "arccosh", "arcsin", "arcsinh", "arctan", "arctanh", "cbrt", "ceil", "copy", "cos", "cosh", "conjugate", "degrees", "ediff1d", "exp", "exp2", "expm1", "fabs", "floor", "log", diff --git a/tests/third_party/cupy/math_tests/test_arithmetic.py b/tests/third_party/cupy/math_tests/test_arithmetic.py index 21068ece8749..027722d8bef2 100644 --- a/tests/third_party/cupy/math_tests/test_arithmetic.py +++ b/tests/third_party/cupy/math_tests/test_arithmetic.py @@ -146,27 +146,35 @@ def check_binary(self, xp): y = y.astype(numpy.complex64) # NumPy returns an output array of another type than DPNP when input ones have diffrent types. - if self.name in ('add', 'multiply', 'subtract') and xp is cupy and dtype1 != dtype2 and not self.use_dtype: + if xp is cupy and dtype1 != dtype2 and not self.use_dtype: is_array_arg1 = not xp.isscalar(arg1) is_array_arg2 = not xp.isscalar(arg2) is_int_float = lambda _x, _y: numpy.issubdtype(_x, numpy.integer) and numpy.issubdtype(_y, numpy.floating) is_same_type = lambda _x, _y, _type: numpy.issubdtype(_x, _type) and numpy.issubdtype(_y, _type) - if is_array_arg1 and is_array_arg2: - # If both inputs are arrays where one is of floating type and another - integer, - # NumPy will return an output array of always "float64" type, - # while DPNP will return the array of a wider type from the input arrays. - if is_int_float(dtype1, dtype2) or is_int_float(dtype2, dtype1): - y = y.astype(numpy.float64) - elif is_same_type(dtype1, dtype2, numpy.floating) or is_same_type(dtype1, dtype2, numpy.integer): - # If one input is an array and another - scalar, - # NumPy will return an output array of the same type as the inpupt array has, - # while DPNP will return the array of a wider type from the inputs (considering both array and scalar). - if is_array_arg1 and not is_array_arg2: - y = y.astype(dtype1) - elif is_array_arg2 and not is_array_arg1: - y = y.astype(dtype2) + if self.name in ('add', 'multiply', 'subtract'): + if is_array_arg1 and is_array_arg2: + # If both inputs are arrays where one is of floating type and another - integer, + # NumPy will return an output array of always "float64" type, + # while DPNP will return the array of a wider type from the input arrays. + if is_int_float(dtype1, dtype2) or is_int_float(dtype2, dtype1): + y = y.astype(numpy.float64) + elif is_same_type(dtype1, dtype2, numpy.floating) or is_same_type(dtype1, dtype2, numpy.integer): + # If one input is an array and another - scalar, + # NumPy will return an output array of the same type as the inpupt array has, + # while DPNP will return the array of a wider type from the inputs (considering both array and scalar). + if is_array_arg1 and not is_array_arg2: + y = y.astype(dtype1) + elif is_array_arg2 and not is_array_arg1: + y = y.astype(dtype2) + elif self.name in ('divide', 'true_divide'): + # If one input is an array of float32 and another - an integer or floating scalar, + # NumPy will return an output array of float32, while DPNP will return the array of float64, + # since NumPy would use the same float64 type when instead of scalar here is array of integer of floating type. + if not (is_array_arg1 and is_array_arg2): + if (is_array_arg1 and arg1.dtype == numpy.float32) ^ (is_array_arg2 and arg2.dtype == numpy.float32): + y = y.astype(numpy.float32) # NumPy returns different values (nan/inf) on division by zero # depending on the architecture. diff --git a/tests/third_party/cupy/statistics_tests/test_meanvar.py b/tests/third_party/cupy/statistics_tests/test_meanvar.py index aea22d02c511..60d3413b0daa 100644 --- a/tests/third_party/cupy/statistics_tests/test_meanvar.py +++ b/tests/third_party/cupy/statistics_tests/test_meanvar.py @@ -89,7 +89,6 @@ def test_median_axis_sequence(self, xp, dtype): return xp.median(a, self.axis, keepdims=self.keepdims) -@pytest.mark.usefixtures("allow_fall_back_on_numpy") @testing.gpu class TestAverage(unittest.TestCase): @@ -101,12 +100,14 @@ def test_average_all(self, xp, dtype): a = testing.shaped_arange((2, 3), xp, dtype) return xp.average(a) + @pytest.mark.usefixtures("allow_fall_back_on_numpy") @testing.for_all_dtypes() @testing.numpy_cupy_allclose() def test_average_axis(self, xp, dtype): a = testing.shaped_arange((2, 3, 4), xp, dtype) return xp.average(a, axis=1) + @pytest.mark.usefixtures("allow_fall_back_on_numpy") @testing.for_all_dtypes() @testing.numpy_cupy_allclose() def test_average_weights(self, xp, dtype): @@ -114,6 +115,7 @@ def test_average_weights(self, xp, dtype): w = testing.shaped_arange((2, 3), xp, dtype) return xp.average(a, weights=w) + @pytest.mark.usefixtures("allow_fall_back_on_numpy") @testing.for_all_dtypes() @testing.numpy_cupy_allclose() def test_average_axis_weights(self, xp, dtype): @@ -132,6 +134,7 @@ def check_returned(self, a, axis, weights): testing.assert_allclose(average_cpu, average_gpu) testing.assert_allclose(sum_weights_cpu, sum_weights_gpu) + @pytest.mark.usefixtures("allow_fall_back_on_numpy") @testing.for_all_dtypes() def test_returned(self, dtype): a = testing.shaped_arange((2, 3), numpy, dtype) From 0b3ce153bc2e223ab0833bb3ae73b441ce24b2ff Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Mon, 13 Feb 2023 09:36:28 -0600 Subject: [PATCH 4/7] dpnp.divide() doesn't work properly with a scalar --- .../include/dpnp_gen_2arg_3type_tbl.hpp | 18 +-- dpnp/backend/kernels/dpnp_krnl_elemwise.cpp | 139 ++++++++++-------- dpnp/backend/src/dpnp_fptr.hpp | 33 +++++ dpnp/dpnp_iface_mathematical.py | 20 ++- tests/test_usm_type.py | 26 ++++ 5 files changed, 159 insertions(+), 77 deletions(-) diff --git a/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp b/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp index 850f3c7213c2..148a0a9e646f 100644 --- a/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp +++ b/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp @@ -111,8 +111,8 @@ MACRO_2ARG_3TYPES_OP(dpnp_add_c, input1_elem + input2_elem, - sycl::add_sat(x1, x2), - MACRO_UNPACK_TYPES(int, long), + x1 + x2, + MACRO_UNPACK_TYPES(int, long, bool), oneapi::mkl::vm::add, MACRO_UNPACK_TYPES(float, double, std::complex, std::complex)) @@ -132,8 +132,8 @@ MACRO_2ARG_3TYPES_OP(dpnp_copysign_c, MACRO_2ARG_3TYPES_OP(dpnp_divide_c, input1_elem / input2_elem, - sycl::native::divide(x1, x2), - MACRO_UNPACK_TYPES(float, double), + x1 / x2, + MACRO_UNPACK_TYPES(int, long, bool), oneapi::mkl::vm::div, MACRO_UNPACK_TYPES(float, double, std::complex, std::complex)) @@ -169,9 +169,9 @@ MACRO_2ARG_3TYPES_OP(dpnp_minimum_c, // pytest "tests/third_party/cupy/creation_tests/test_ranges.py::TestMgrid::test_mgrid3" // requires multiplication shape1[10] with shape2[10,1] and result expected as shape[10,10] MACRO_2ARG_3TYPES_OP(dpnp_multiply_c, - input1_elem* input2_elem, - nullptr, - std::false_type, + input1_elem * input2_elem, + x1 * x2, + MACRO_UNPACK_TYPES(int, long, bool), oneapi::mkl::vm::mul, MACRO_UNPACK_TYPES(float, double, std::complex, std::complex)) @@ -184,8 +184,8 @@ MACRO_2ARG_3TYPES_OP(dpnp_power_c, MACRO_2ARG_3TYPES_OP(dpnp_subtract_c, input1_elem - input2_elem, - sycl::sub_sat(x1, x2), - MACRO_UNPACK_TYPES(int, long), + x1 - x2, + MACRO_UNPACK_TYPES(int, long, bool), oneapi::mkl::vm::sub, MACRO_UNPACK_TYPES(float, double, std::complex, std::complex)) diff --git a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp index 097cab021d6c..6f6816012eb7 100644 --- a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp @@ -881,9 +881,9 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap) \ sycl::queue q = *(reinterpret_cast(q_ref)); \ \ - _DataType_input1* input1_data = static_cast<_DataType_input1 *>(const_cast(input1_in)); \ - _DataType_input2* input2_data = static_cast<_DataType_input2 *>(const_cast(input2_in)); \ - _DataType_output* result = static_cast<_DataType_output *>(result_out); \ + _DataType_input1* input1_data = static_cast<_DataType_input1*>(const_cast(input1_in)); \ + _DataType_input2* input2_data = static_cast<_DataType_input2*>(const_cast(input2_in)); \ + _DataType_output* result = static_cast<_DataType_output*>(result_out); \ \ bool use_broadcasting = !array_equal(input1_shape, input1_ndim, input2_shape, input2_ndim); \ \ @@ -896,8 +896,7 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap) shape_elem_type* input2_shape_offsets = new shape_elem_type[input2_ndim]; \ \ get_shape_offsets_inkernel(input2_shape, input2_ndim, input2_shape_offsets); \ - use_strides = \ - use_strides || !array_equal(input2_strides, input2_ndim, input2_shape_offsets, input2_ndim); \ + use_strides = use_strides || !array_equal(input2_strides, input2_ndim, input2_shape_offsets, input2_ndim); \ delete[] input2_shape_offsets; \ \ sycl::event event; \ @@ -907,19 +906,17 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap) { \ DPNPC_id<_DataType_input1>* input1_it; \ const size_t input1_it_size_in_bytes = sizeof(DPNPC_id<_DataType_input1>); \ - input1_it = reinterpret_cast*>(dpnp_memory_alloc_c(q_ref, \ - input1_it_size_in_bytes)); \ - new (input1_it) \ - DPNPC_id<_DataType_input1>(q_ref, input1_data, input1_shape, input1_strides, input1_ndim); \ + input1_it = \ + reinterpret_cast*>(dpnp_memory_alloc_c(q_ref, input1_it_size_in_bytes)); \ + new (input1_it) DPNPC_id<_DataType_input1>(q_ref, input1_data, input1_shape, input1_strides, input1_ndim); \ \ input1_it->broadcast_to_shape(result_shape, result_ndim); \ \ DPNPC_id<_DataType_input2>* input2_it; \ const size_t input2_it_size_in_bytes = sizeof(DPNPC_id<_DataType_input2>); \ - input2_it = reinterpret_cast*>(dpnp_memory_alloc_c(q_ref, \ - input2_it_size_in_bytes)); \ - new (input2_it) \ - DPNPC_id<_DataType_input2>(q_ref, input2_data, input2_shape, input2_strides, input2_ndim); \ + input2_it = \ + reinterpret_cast*>(dpnp_memory_alloc_c(q_ref, input2_it_size_in_bytes)); \ + new (input2_it) DPNPC_id<_DataType_input2>(q_ref, input2_data, input2_shape, input2_strides, input2_ndim); \ \ input2_it->broadcast_to_shape(result_shape, result_ndim); \ \ @@ -957,27 +954,26 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap) using usm_host_allocatorT = sycl::usm_allocator; \ \ size_t strides_size = 3 * result_ndim; \ - shape_elem_type *dev_strides_data = sycl::malloc_device(strides_size, q); \ + shape_elem_type* dev_strides_data = sycl::malloc_device(strides_size, q); \ \ /* create host temporary for packed strides managed by shared pointer */ \ - auto strides_host_packed = std::vector(strides_size, \ - usm_host_allocatorT(q)); \ + auto strides_host_packed = \ + std::vector(strides_size, usm_host_allocatorT(q)); \ \ /* packed vector is concatenation of result_strides, input1_strides and input2_strides */ \ std::copy(result_strides, result_strides + result_ndim, strides_host_packed.begin()); \ std::copy(input1_strides, input1_strides + result_ndim, strides_host_packed.begin() + result_ndim); \ std::copy(input2_strides, input2_strides + result_ndim, strides_host_packed.begin() + 2 * result_ndim); \ \ - auto copy_strides_ev = q.copy(strides_host_packed.data(), \ - dev_strides_data, \ - strides_host_packed.size()); \ + auto copy_strides_ev = \ + q.copy(strides_host_packed.data(), dev_strides_data, strides_host_packed.size()); \ \ auto kernel_parallel_for_func = [=](sycl::id<1> global_id) { \ const size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */ \ { \ - const shape_elem_type *result_strides_data = &dev_strides_data[0]; \ - const shape_elem_type *input1_strides_data = &dev_strides_data[1]; \ - const shape_elem_type *input2_strides_data = &dev_strides_data[2]; \ + const shape_elem_type* result_strides_data = &dev_strides_data[0]; \ + const shape_elem_type* input1_strides_data = &dev_strides_data[1]; \ + const shape_elem_type* input2_strides_data = &dev_strides_data[2]; \ \ size_t input1_id = 0; \ size_t input2_id = 0; \ @@ -1013,8 +1009,10 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap) { \ event = __mkl_operation__(q, result_size, input1_data, input2_data, result); \ } \ - else if constexpr (none_of_both_types<_DataType_input1, _DataType_input2, \ - std::complex, std::complex>) \ + else if constexpr (none_of_both_types<_DataType_input1, \ + _DataType_input2, \ + std::complex, \ + std::complex>) \ { \ constexpr size_t lws = 64; \ constexpr unsigned int vec_sz = 8; \ @@ -1026,22 +1024,47 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap) auto kernel_parallel_for_func = [=](sycl::nd_item<1> nd_it) { \ auto sg = nd_it.get_sub_group(); \ const auto max_sg_size = sg.get_max_local_range()[0]; \ - const size_t start = vec_sz * (nd_it.get_group(0) * nd_it.get_local_range(0) + \ - sg.get_group_id()[0] * max_sg_size); \ + const size_t start = \ + vec_sz * (nd_it.get_group(0) * nd_it.get_local_range(0) + sg.get_group_id()[0] * max_sg_size); \ \ if (start + static_cast(vec_sz) * max_sg_size < result_size) \ { \ - sycl::vec<_DataType_input1, vec_sz> x1 = \ - sg.load(sycl::multi_ptr<_DataType_input1, global_space>(&input1_data[start])); \ - sycl::vec<_DataType_input2, vec_sz> x2 = \ - sg.load(sycl::multi_ptr<_DataType_input2, global_space>(&input2_data[start])); \ + using input1_ptrT = sycl::multi_ptr<_DataType_input1, global_space>; \ + using input2_ptrT = sycl::multi_ptr<_DataType_input2, global_space>; \ + using result_ptrT = sycl::multi_ptr<_DataType_output, global_space>; \ + \ sycl::vec<_DataType_output, vec_sz> res_vec; \ - if constexpr (both_types_are_same<_DataType_input1, _DataType_input2, __vec_types__>) \ + \ + if constexpr (both_types_are_any_of<_DataType_input1, _DataType_input2, __vec_types__>) \ { \ - res_vec = __vec_operation__; \ + if constexpr (both_types_are_same<_DataType_input1, _DataType_input2, _DataType_output>) \ + { \ + sycl::vec<_DataType_input1, vec_sz> x1 = \ + sg.load(input1_ptrT(&input1_data[start])); \ + sycl::vec<_DataType_input2, vec_sz> x2 = \ + sg.load(input2_ptrT(&input2_data[start])); \ + \ + res_vec = __vec_operation__; \ + } \ + else /* input types don't match result type, so explicit casting is required */ \ + { \ + sycl::vec<_DataType_output, vec_sz> x1 = \ + dpnp_vec_cast<_DataType_output, _DataType_input1, vec_sz>( \ + sg.load(input1_ptrT(&input1_data[start]))); \ + sycl::vec<_DataType_output, vec_sz> x2 = \ + dpnp_vec_cast<_DataType_output, _DataType_input2, vec_sz>( \ + sg.load(input2_ptrT(&input2_data[start]))); \ + \ + res_vec = __vec_operation__; \ + } \ } \ else \ { \ + sycl::vec<_DataType_input1, vec_sz> x1 = \ + sg.load(input1_ptrT(&input1_data[start])); \ + sycl::vec<_DataType_input2, vec_sz> x2 = \ + sg.load(input2_ptrT(&input2_data[start])); \ + \ for (size_t k = 0; k < vec_sz; ++k) \ { \ const _DataType_output input1_elem = x1[k]; \ @@ -1049,8 +1072,7 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap) res_vec[k] = __operation__; \ } \ } \ - sg.store(sycl::multi_ptr<_DataType_output, global_space>(&result[start]), res_vec); \ - \ + sg.store(result_ptrT(&result[start]), res_vec); \ } \ else \ { \ @@ -1064,8 +1086,8 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap) }; \ \ auto kernel_func = [&](sycl::handler& cgh) { \ - sycl::stream out(65536, 128, cgh);\ - cgh.parallel_for>(\ + cgh.parallel_for< \ + class __name__##_sg_kernel<_DataType_output, _DataType_input1, _DataType_input2>>( \ sycl::nd_range<1>(gws_range, lws_range), kernel_parallel_for_func); \ }; \ event = q.submit(kernel_func); \ @@ -1078,7 +1100,6 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap) const _DataType_output input1_elem = input1_data[i]; \ const _DataType_output input2_elem = input2_data[i]; \ result[i] = __operation__; \ - \ }; \ auto kernel_func = [&](sycl::handler& cgh) { \ cgh.parallel_for>( \ @@ -1112,26 +1133,25 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap) { \ DPCTLSyclQueueRef q_ref = reinterpret_cast(&DPNP_QUEUE); \ DPCTLEventVectorRef dep_event_vec_ref = nullptr; \ - DPCTLSyclEventRef event_ref = __name__<_DataType_output, _DataType_input1, _DataType_input2>( \ - q_ref, \ - result_out, \ - result_size, \ - result_ndim, \ - result_shape, \ - result_strides, \ - input1_in, \ - input1_size, \ - input1_ndim, \ - input1_shape, \ - input1_strides, \ - input2_in, \ - input2_size, \ - input2_ndim, \ - input2_shape, \ - input2_strides, \ - where, \ - dep_event_vec_ref \ - ); \ + DPCTLSyclEventRef event_ref = \ + __name__<_DataType_output, _DataType_input1, _DataType_input2>(q_ref, \ + result_out, \ + result_size, \ + result_ndim, \ + result_shape, \ + result_strides, \ + input1_in, \ + input1_size, \ + input1_ndim, \ + input1_shape, \ + input1_strides, \ + input2_in, \ + input2_size, \ + input2_ndim, \ + input2_shape, \ + input2_strides, \ + where, \ + dep_event_vec_ref); \ DPCTLEvent_WaitAndThrow(event_ref); \ DPCTLEvent_Delete(event_ref); \ } \ @@ -1172,9 +1192,8 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap) const shape_elem_type*, \ const shape_elem_type*, \ const size_t*, \ - const DPCTLEventVectorRef) = __name__<_DataType_output, \ - _DataType_input1, \ - _DataType_input2>; + const DPCTLEventVectorRef) = \ + __name__<_DataType_output, _DataType_input1, _DataType_input2>; #include diff --git a/dpnp/backend/src/dpnp_fptr.hpp b/dpnp/backend/src/dpnp_fptr.hpp index 4cb664858319..742e6dff3783 100644 --- a/dpnp/backend/src/dpnp_fptr.hpp +++ b/dpnp/backend/src/dpnp_fptr.hpp @@ -35,6 +35,8 @@ #include #include +#include + #include /** @@ -116,6 +118,31 @@ static constexpr DPNPFuncType populate_func_types() return (FT1 < FT2) ? FT2 : FT1; } +/** + * @brief A helper function to cast SYCL vector between types. + */ +template +static auto dpnp_vec_cast_impl(const Vec& v, std::index_sequence) +{ + return Op{v[I]...}; +} + +/** + * @brief A casting function for SYCL vector. + * + * @tparam dstT A result type upon casting. + * @tparam srcT An incoming type of the vector. + * @tparam N A number of elements with the vector. + * @tparam Indices A sequence of integers + * @param s An incoming SYCL vector to cast. + * @return SYCL vector casted to desctination type. + */ +template > +static auto dpnp_vec_cast(const sycl::vec& s) +{ + return dpnp_vec_cast_impl, sycl::vec>(s, Indices{}); +} + /** * Removes parentheses for a passed list of types separated by comma. * It's intended to be used in operations macro. @@ -142,6 +169,12 @@ struct are_same : std::conjunction...> {}; template constexpr auto both_types_are_same = std::conjunction_v, are_same>; +/** + * A template constat to check if both types T1 and T2 match any type from Ts. + */ +template +constexpr auto both_types_are_any_of = std::conjunction_v, is_any>; + /** * A template constat to check if both types T1 and T2 don't match any type from Ts sequence. */ diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py index af57819827c9..feff53288cfd 100644 --- a/dpnp/dpnp_iface_mathematical.py +++ b/dpnp/dpnp_iface_mathematical.py @@ -593,11 +593,13 @@ def divide(x1, # at least either x1 or x2 has to be an array pass else: - # get a common queue to copy data from the host into a device if any input is scalar - queue = get_common_allocation_queue([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else None + # get USM type and queue to copy scalar from the host memory into a USM allocation + usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None) - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue) - x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, + alloc_usm_type=usm_type, alloc_queue=queue) + x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, + alloc_usm_type=usm_type, alloc_queue=queue) if x1_desc and x2_desc: return dpnp_divide(x1_desc, x2_desc, dtype=dtype, out=out, where=where).get_pyobj() @@ -1578,11 +1580,13 @@ def subtract(x1, # at least either x1 or x2 has to be an array pass else: - # get a common queue to copy data from the host into a device if any input is scalar - queue = get_common_allocation_queue([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else None + # get USM type and queue to copy scalar from the host memory into a USM allocation + usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None) - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue) - x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, + alloc_usm_type=usm_type, alloc_queue=queue) + x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, + alloc_usm_type=usm_type, alloc_queue=queue) if x1_desc and x2_desc: if x1_desc.dtype == x2_desc.dtype == dpnp.bool: raise TypeError("DPNP boolean subtract, the `-` operator, is not supported, " diff --git a/tests/test_usm_type.py b/tests/test_usm_type.py index 15b853b3bfa2..2f754df996a8 100644 --- a/tests/test_usm_type.py +++ b/tests/test_usm_type.py @@ -37,6 +37,32 @@ def test_coerced_usm_types_mul(usm_type_x, usm_type_y): assert z.usm_type == du.get_coerced_usm_type([usm_type_x, usm_type_y]) +@pytest.mark.parametrize("usm_type_x", list_of_usm_types, ids=list_of_usm_types) +@pytest.mark.parametrize("usm_type_y", list_of_usm_types, ids=list_of_usm_types) +def test_coerced_usm_types_subtract(usm_type_x, usm_type_y): + x = dp.arange(50, usm_type = usm_type_x) + y = dp.arange(50, usm_type = usm_type_y) + + z = 20 - x - y - 7.4 + + assert x.usm_type == usm_type_x + assert y.usm_type == usm_type_y + assert z.usm_type == du.get_coerced_usm_type([usm_type_x, usm_type_y]) + + +@pytest.mark.parametrize("usm_type_x", list_of_usm_types, ids=list_of_usm_types) +@pytest.mark.parametrize("usm_type_y", list_of_usm_types, ids=list_of_usm_types) +def test_coerced_usm_types_divide(usm_type_x, usm_type_y): + x = dp.arange(120, usm_type = usm_type_x) + y = dp.arange(120, usm_type = usm_type_y) + + z = 2 / x / y / 1.5 + + assert x.usm_type == usm_type_x + assert y.usm_type == usm_type_y + assert z.usm_type == du.get_coerced_usm_type([usm_type_x, usm_type_y]) + + @pytest.mark.parametrize( "func, args", [ From a37dfdc799e886800173dd4de0dc7b40e122d303 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Mon, 13 Feb 2023 09:58:05 -0600 Subject: [PATCH 5/7] Use std::int32_t and std::int64_t types --- dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp b/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp index 148a0a9e646f..e345c6eefea7 100644 --- a/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp +++ b/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp @@ -112,7 +112,7 @@ MACRO_2ARG_3TYPES_OP(dpnp_add_c, input1_elem + input2_elem, x1 + x2, - MACRO_UNPACK_TYPES(int, long, bool), + MACRO_UNPACK_TYPES(bool, std::int32_t, std::int64_t), oneapi::mkl::vm::add, MACRO_UNPACK_TYPES(float, double, std::complex, std::complex)) @@ -133,7 +133,7 @@ MACRO_2ARG_3TYPES_OP(dpnp_copysign_c, MACRO_2ARG_3TYPES_OP(dpnp_divide_c, input1_elem / input2_elem, x1 / x2, - MACRO_UNPACK_TYPES(int, long, bool), + MACRO_UNPACK_TYPES(bool, std::int32_t, std::int64_t), oneapi::mkl::vm::div, MACRO_UNPACK_TYPES(float, double, std::complex, std::complex)) @@ -171,7 +171,7 @@ MACRO_2ARG_3TYPES_OP(dpnp_minimum_c, MACRO_2ARG_3TYPES_OP(dpnp_multiply_c, input1_elem * input2_elem, x1 * x2, - MACRO_UNPACK_TYPES(int, long, bool), + MACRO_UNPACK_TYPES(bool, std::int32_t, std::int64_t), oneapi::mkl::vm::mul, MACRO_UNPACK_TYPES(float, double, std::complex, std::complex)) @@ -185,7 +185,7 @@ MACRO_2ARG_3TYPES_OP(dpnp_power_c, MACRO_2ARG_3TYPES_OP(dpnp_subtract_c, input1_elem - input2_elem, x1 - x2, - MACRO_UNPACK_TYPES(int, long, bool), + MACRO_UNPACK_TYPES(bool, std::int32_t, std::int64_t), oneapi::mkl::vm::sub, MACRO_UNPACK_TYPES(float, double, std::complex, std::complex)) From 1c6517af0c7c59772a57a6e49e6304714db8ae46 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Tue, 14 Feb 2023 11:58:29 -0600 Subject: [PATCH 6/7] Disable floating-point optimizations that assume arguments and results are not NaNs or +-Inf --- dpnp/backend/CMakeLists.txt | 1 + utils/command_build_clib.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/dpnp/backend/CMakeLists.txt b/dpnp/backend/CMakeLists.txt index f66aa4be1ae5..52e9cb21985b 100644 --- a/dpnp/backend/CMakeLists.txt +++ b/dpnp/backend/CMakeLists.txt @@ -93,6 +93,7 @@ string(CONCAT COMMON_COMPILE_FLAGS "-fsycl " "-fsycl-device-code-split=per_kernel " "-fno-approx-func " + "-fno-finite-math-only " ) string(CONCAT COMMON_LINK_FLAGS "-fsycl " diff --git a/utils/command_build_clib.py b/utils/command_build_clib.py index 95887cc65aaa..65673f1ae69a 100644 --- a/utils/command_build_clib.py +++ b/utils/command_build_clib.py @@ -63,7 +63,7 @@ # default variables (for Linux) _project_compiler = "icpx" _project_linker = "icpx" -_project_cmplr_flag_sycl_devel = ["-fsycl-device-code-split=per_kernel", "-fno-approx-func"] +_project_cmplr_flag_sycl_devel = ["-fsycl-device-code-split=per_kernel", "-fno-approx-func", "-fno-approx-func"] _project_cmplr_flag_sycl = ["-fsycl"] _project_cmplr_flag_stdcpp_static = [] # This brakes TBB ["-static-libstdc++", "-static-libgcc"] _project_cmplr_flag_compatibility = ["-Wl,--enable-new-dtags"] From 9cbf2bde1d329095e52288878ab2e5791864d0d6 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Thu, 16 Feb 2023 16:44:49 +0100 Subject: [PATCH 7/7] Fix issue with divide on Iris Xe --- dpnp/dpnp_algo/dpnp_algo.pyx | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/dpnp/dpnp_algo/dpnp_algo.pyx b/dpnp/dpnp_algo/dpnp_algo.pyx index 54e71b87d767..3838ab5812c6 100644 --- a/dpnp/dpnp_algo/dpnp_algo.pyx +++ b/dpnp/dpnp_algo/dpnp_algo.pyx @@ -495,22 +495,24 @@ cdef utils.dpnp_descriptor call_fptr_2in_1out_strides(DPNPFuncName fptr_name, # get FPTR function and result type cdef fptr_2in_1out_strides_t func = NULL + cdef DPNPFuncType return_type = DPNP_FT_NONE if fptr_name != DPNP_FN_DIVIDE_EXT or result_sycl_device.has_aspect_fp64: - result_type = dpnp_DPNPFuncType_to_dtype(< size_t > kernel_data.return_type) + return_type = kernel_data.return_type func = < fptr_2in_1out_strides_t > kernel_data.ptr else: - result_type = dpnp_DPNPFuncType_to_dtype(< size_t > kernel_data.return_type_no_fp64) + return_type = kernel_data.return_type_no_fp64 func = < fptr_2in_1out_strides_t > kernel_data.ptr_no_fp64 if out is None: """ Create result array with type given by FPTR data """ result = utils.create_output_descriptor(result_shape, - kernel_data.return_type, + return_type, None, device=result_sycl_device, usm_type=result_usm_type, sycl_queue=result_sycl_queue) else: + result_type = dpnp_DPNPFuncType_to_dtype(< size_t > return_type) if out.dtype != result_type: utils.checker_throw_value_error(func_name, 'out.dtype', out.dtype, result_type) if out.shape != result_shape: