From 8dd508879f3782c0fc64239090bdbf779ef72c83 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 3 Feb 2023 05:39:40 -0600 Subject: [PATCH 1/5] dpnp.add() doesn't work properly with a scalar --- .../include/dpnp_gen_2arg_3type_tbl.hpp | 93 ++++- dpnp/backend/include/dpnp_iface.hpp | 3 +- dpnp/backend/include/dpnp_iface_fptr.hpp | 4 +- dpnp/backend/kernels/dpnp_krnl_elemwise.cpp | 351 ++++++++---------- dpnp/backend/src/dpnp_fptr.hpp | 49 +++ dpnp/dpnp_iface_mathematical.py | 82 ++-- tests/skipped_tests.tbl | 9 +- tests/skipped_tests_gpu.tbl | 37 +- tests/test_indexing.py | 55 +-- tests/test_mathematical.py | 148 ++++---- tests/test_strides.py | 67 ++-- .../cupy/math_tests/test_arithmetic.py | 2 +- 12 files changed, 466 insertions(+), 434 deletions(-) diff --git a/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp b/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp index 5d4ae22f796f..33f5e0d19a46 100644 --- a/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp +++ b/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp @@ -1,5 +1,5 @@ //***************************************************************************** -// Copyright (c) 2016-2020, Intel Corporation +// Copyright (c) 2016-2023, Intel Corporation // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -31,7 +31,10 @@ * Parameters: * - public name of the function and kernel name * - operation used to calculate the result + * - vector operation over SYCL group used to calculate the result + * - list of types vector operation accepts * - mkl operation used to calculate the result + * - list of types mkl operation accepts * */ @@ -41,11 +44,12 @@ #ifdef _SECTION_DOCUMENTATION_GENERATION_ -#define MACRO_2ARG_3TYPES_OP(__name__, __operation1__, __operation2__) \ +#define MACRO_2ARG_3TYPES_OP( \ + __name__, __operation__, __vec_operation__, __vec_types__, __mkl_operation__, __mkl_types__) \ /** @ingroup BACKEND_API */ \ /** @brief Per element operation function __name__ */ \ /** */ \ - /** Function "__name__" executes operator "__operation1__" over corresponding elements of input arrays */ \ + /** Function "__name__" executes operator "__operation__" over corresponding elements of input arrays */ \ /** */ \ /** @param[in] q_ref Reference to SYCL queue. */ \ /** @param[out] result_out Output array. */ \ @@ -105,23 +109,84 @@ #endif -MACRO_2ARG_3TYPES_OP(dpnp_add_c, input1_elem + input2_elem, oneapi::mkl::vm::add) -MACRO_2ARG_3TYPES_OP(dpnp_arctan2_c, sycl::atan2((double)input1_elem, (double)input2_elem), oneapi::mkl::vm::atan2) +MACRO_2ARG_3TYPES_OP(dpnp_add_c, + input1_elem + input2_elem, + sycl::add_sat(x1, x2), + MACRO_UNPACK_TYPES(int, long), + oneapi::mkl::vm::add, + MACRO_UNPACK_TYPES(float, double, std::complex, std::complex)) + +MACRO_2ARG_3TYPES_OP(dpnp_arctan2_c, + sycl::atan2((double)input1_elem, (double)input2_elem), + nullptr, + std::false_type, + oneapi::mkl::vm::atan2, + MACRO_UNPACK_TYPES(float, double)) + MACRO_2ARG_3TYPES_OP(dpnp_copysign_c, sycl::copysign((double)input1_elem, (double)input2_elem), - oneapi::mkl::vm::copysign) -MACRO_2ARG_3TYPES_OP(dpnp_divide_c, input1_elem / input2_elem, oneapi::mkl::vm::div) -MACRO_2ARG_3TYPES_OP(dpnp_fmod_c, sycl::fmod((double)input1_elem, (double)input2_elem), oneapi::mkl::vm::fmod) -MACRO_2ARG_3TYPES_OP(dpnp_hypot_c, sycl::hypot((double)input1_elem, (double)input2_elem), oneapi::mkl::vm::hypot) -MACRO_2ARG_3TYPES_OP(dpnp_maximum_c, sycl::max(input1_elem, input2_elem), oneapi::mkl::vm::fmax) -MACRO_2ARG_3TYPES_OP(dpnp_minimum_c, sycl::min(input1_elem, input2_elem), oneapi::mkl::vm::fmin) + nullptr, + std::false_type, + oneapi::mkl::vm::copysign, + MACRO_UNPACK_TYPES(float, double)) + +MACRO_2ARG_3TYPES_OP(dpnp_divide_c, + input1_elem / input2_elem, + nullptr, + std::false_type, + oneapi::mkl::vm::div, + MACRO_UNPACK_TYPES(float, double)) + +MACRO_2ARG_3TYPES_OP(dpnp_fmod_c, + sycl::fmod((double)input1_elem, (double)input2_elem), + nullptr, + std::false_type, + oneapi::mkl::vm::fmod, + MACRO_UNPACK_TYPES(float, double)) + +MACRO_2ARG_3TYPES_OP(dpnp_hypot_c, + sycl::hypot((double)input1_elem, (double)input2_elem), + nullptr, + std::false_type, + oneapi::mkl::vm::hypot, + MACRO_UNPACK_TYPES(float, double)) + +MACRO_2ARG_3TYPES_OP(dpnp_maximum_c, + sycl::max(input1_elem, input2_elem), + nullptr, + std::false_type, + oneapi::mkl::vm::fmax, + MACRO_UNPACK_TYPES(float, double)) + +MACRO_2ARG_3TYPES_OP(dpnp_minimum_c, + sycl::min(input1_elem, input2_elem), + nullptr, + std::false_type, + oneapi::mkl::vm::fmin, + MACRO_UNPACK_TYPES(float, double)) // "multiply" needs to be standalone kernel (not autogenerated) due to complex algorithm. This is not an element wise. // pytest "tests/third_party/cupy/creation_tests/test_ranges.py::TestMgrid::test_mgrid3" // requires multiplication shape1[10] with shape2[10,1] and result expected as shape[10,10] -MACRO_2ARG_3TYPES_OP(dpnp_multiply_c, input1_elem* input2_elem, oneapi::mkl::vm::mul) +MACRO_2ARG_3TYPES_OP(dpnp_multiply_c, + input1_elem* input2_elem, + nullptr, + std::false_type, + oneapi::mkl::vm::mul, + MACRO_UNPACK_TYPES(float, double, std::complex, std::complex)) + +MACRO_2ARG_3TYPES_OP(dpnp_power_c, + sycl::pow((double)input1_elem, (double)input2_elem), + nullptr, + std::false_type, + oneapi::mkl::vm::pow, + MACRO_UNPACK_TYPES(float, double)) -MACRO_2ARG_3TYPES_OP(dpnp_power_c, sycl::pow((double)input1_elem, (double)input2_elem), oneapi::mkl::vm::pow) -MACRO_2ARG_3TYPES_OP(dpnp_subtract_c, input1_elem - input2_elem, oneapi::mkl::vm::sub) +MACRO_2ARG_3TYPES_OP(dpnp_subtract_c, + input1_elem - input2_elem, + nullptr, + std::false_type, + oneapi::mkl::vm::sub, + MACRO_UNPACK_TYPES(float, double)) #undef MACRO_2ARG_3TYPES_OP diff --git a/dpnp/backend/include/dpnp_iface.hpp b/dpnp/backend/include/dpnp_iface.hpp index 713e3e821979..7a80b40a3d2e 100644 --- a/dpnp/backend/include/dpnp_iface.hpp +++ b/dpnp/backend/include/dpnp_iface.hpp @@ -1829,7 +1829,8 @@ INP_DLLEXPORT void dpnp_invert_c(void* array1_in, void* result, size_t size); #include -#define MACRO_2ARG_3TYPES_OP(__name__, __operation1__, __operation2__) \ +#define MACRO_2ARG_3TYPES_OP( \ + __name__, __operation__, __vec_operation__, __vec_types__, __mkl_operation__, __mkl_types__) \ template \ INP_DLLEXPORT DPCTLSyclEventRef __name__(DPCTLSyclQueueRef q_ref, \ void* result_out, \ diff --git a/dpnp/backend/include/dpnp_iface_fptr.hpp b/dpnp/backend/include/dpnp_iface_fptr.hpp index f77a37aade89..61c1c9838ad6 100644 --- a/dpnp/backend/include/dpnp_iface_fptr.hpp +++ b/dpnp/backend/include/dpnp_iface_fptr.hpp @@ -394,13 +394,13 @@ enum class DPNPFuncName : size_t enum class DPNPFuncType : size_t { DPNP_FT_NONE, /**< Very first element of the enumeration */ + DPNP_FT_BOOL, /**< analog of numpy.bool_ or bool */ DPNP_FT_INT, /**< analog of numpy.int32 or int */ DPNP_FT_LONG, /**< analog of numpy.int64 or long */ DPNP_FT_FLOAT, /**< analog of numpy.float32 or float */ DPNP_FT_DOUBLE, /**< analog of numpy.float32 or double */ DPNP_FT_CMPLX64, /**< analog of numpy.complex64 or std::complex */ - DPNP_FT_CMPLX128, /**< analog of numpy.complex128 or std::complex */ - DPNP_FT_BOOL /**< analog of numpy.bool_ or bool */ + DPNP_FT_CMPLX128 /**< analog of numpy.complex128 or std::complex */ }; /** diff --git a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp index eafa50d4cee2..1b90e4a6821e 100644 --- a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp @@ -1,5 +1,5 @@ //***************************************************************************** -// Copyright (c) 2016-2022, Intel Corporation +// Copyright (c) 2016-2023, Intel Corporation // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -825,7 +825,9 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap) return; } -#define MACRO_2ARG_3TYPES_OP(__name__, __operation1__, __operation2__) \ + +#define MACRO_2ARG_3TYPES_OP( \ + __name__, __operation__, __vec_operation__, __vec_types__, __mkl_operation__, __mkl_types__) \ template \ @@ -834,6 +836,11 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap) template \ + class __name__##_sg_kernel; \ + \ + template \ class __name__##_broadcast_kernel; \ \ template (q_ref)); \ \ - DPNPC_ptr_adapter<_DataType_input1> input1_ptr(q_ref, input1_in, input1_size); \ - DPNPC_ptr_adapter input1_shape_ptr(q_ref, input1_shape, input1_ndim, true); \ - DPNPC_ptr_adapter input1_strides_ptr(q_ref, input1_strides, input1_ndim, true); \ - DPNPC_ptr_adapter<_DataType_input2> input2_ptr(q_ref, input2_in, input2_size); \ - DPNPC_ptr_adapter input2_shape_ptr(q_ref, input2_shape, input2_ndim, true); \ - DPNPC_ptr_adapter input2_strides_ptr(q_ref, input2_strides, input2_ndim, true); \ - \ - DPNPC_ptr_adapter<_DataType_output> result_ptr(q_ref, result_out, result_size, false, true); \ - DPNPC_ptr_adapter result_shape_ptr(q_ref, result_shape, result_ndim); \ - DPNPC_ptr_adapter result_strides_ptr(q_ref, result_strides, result_ndim); \ - \ - _DataType_input1* input1_data = input1_ptr.get_ptr(); \ - shape_elem_type* input1_shape_data = input1_shape_ptr.get_ptr(); \ - shape_elem_type* input1_strides_data = input1_strides_ptr.get_ptr(); \ + _DataType_input1* input1_data = static_cast<_DataType_input1 *>(const_cast(input1_in)); \ + _DataType_input2* input2_data = static_cast<_DataType_input2 *>(const_cast(input2_in)); \ + _DataType_output* result = static_cast<_DataType_output *>(result_out); \ \ - _DataType_input2* input2_data = input2_ptr.get_ptr(); \ - shape_elem_type* input2_shape_data = input2_shape_ptr.get_ptr(); \ - shape_elem_type* input2_strides_data = input2_strides_ptr.get_ptr(); \ + bool use_broadcasting = !array_equal(input1_shape, input1_ndim, input2_shape, input2_ndim); \ \ - _DataType_output* result = result_ptr.get_ptr(); \ - shape_elem_type* result_shape_data = result_shape_ptr.get_ptr(); \ - shape_elem_type* result_strides_data = result_strides_ptr.get_ptr(); \ + shape_elem_type* input1_shape_offsets = new shape_elem_type[input1_ndim]; \ \ - bool use_broadcasting = !array_equal(input1_shape_data, input1_ndim, input2_shape_data, input2_ndim); \ + get_shape_offsets_inkernel(input1_shape, input1_ndim, input1_shape_offsets); \ + bool use_strides = !array_equal(input1_strides, input1_ndim, input1_shape_offsets, input1_ndim); \ + delete[] input1_shape_offsets; \ \ - const size_t input1_shape_size_in_bytes = input1_ndim * sizeof(shape_elem_type); \ - shape_elem_type* input1_shape_offsets = \ - reinterpret_cast(sycl::malloc_shared(input1_shape_size_in_bytes, q)); \ - get_shape_offsets_inkernel(input1_shape_data, input1_ndim, input1_shape_offsets); \ - bool use_strides = !array_equal(input1_strides_data, input1_ndim, input1_shape_offsets, input1_ndim); \ - sycl::free(input1_shape_offsets, q); \ + shape_elem_type* input2_shape_offsets = new shape_elem_type[input2_ndim]; \ \ - const size_t input2_shape_size_in_bytes = input2_ndim * sizeof(shape_elem_type); \ - shape_elem_type* input2_shape_offsets = \ - reinterpret_cast(sycl::malloc_shared(input2_shape_size_in_bytes, q)); \ - get_shape_offsets_inkernel(input2_shape_data, input2_ndim, input2_shape_offsets); \ + get_shape_offsets_inkernel(input2_shape, input2_ndim, input2_shape_offsets); \ use_strides = \ - use_strides || !array_equal(input2_strides_data, input2_ndim, input2_shape_offsets, input2_ndim); \ - sycl::free(input2_shape_offsets, q); \ + use_strides || !array_equal(input2_strides, input2_ndim, input2_shape_offsets, input2_ndim); \ + delete[] input2_shape_offsets; \ \ sycl::event event; \ sycl::range<1> gws(result_size); \ @@ -924,25 +910,25 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap) input1_it = reinterpret_cast*>(dpnp_memory_alloc_c(q_ref, \ input1_it_size_in_bytes)); \ new (input1_it) \ - DPNPC_id<_DataType_input1>(q_ref, input1_data, input1_shape_data, input1_strides_data, input1_ndim); \ + DPNPC_id<_DataType_input1>(q_ref, input1_data, input1_shape, input1_strides, input1_ndim); \ \ - input1_it->broadcast_to_shape(result_shape_data, result_ndim); \ + input1_it->broadcast_to_shape(result_shape, result_ndim); \ \ DPNPC_id<_DataType_input2>* input2_it; \ const size_t input2_it_size_in_bytes = sizeof(DPNPC_id<_DataType_input2>); \ input2_it = reinterpret_cast*>(dpnp_memory_alloc_c(q_ref, \ input2_it_size_in_bytes)); \ new (input2_it) \ - DPNPC_id<_DataType_input2>(q_ref, input2_data, input2_shape_data, input2_strides_data, input2_ndim); \ + DPNPC_id<_DataType_input2>(q_ref, input2_data, input2_shape, input2_strides, input2_ndim); \ \ - input2_it->broadcast_to_shape(result_shape_data, result_ndim); \ + input2_it->broadcast_to_shape(result_shape, result_ndim); \ \ auto kernel_parallel_for_func = [=](sycl::id<1> global_id) { \ - const size_t i = global_id[0]; /*for (size_t i = 0; i < result_size; ++i)*/ \ + const size_t i = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */ \ { \ const _DataType_output input1_elem = (*input1_it)[i]; \ const _DataType_output input2_elem = (*input2_it)[i]; \ - result[i] = __operation1__; \ + result[i] = __operation__; \ } \ }; \ auto kernel_func = [&](sycl::handler& cgh) { \ @@ -951,8 +937,7 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap) gws, kernel_parallel_for_func); \ }; \ \ - event = q.submit(kernel_func); \ - event.wait(); \ + q.submit(kernel_func).wait(); \ \ input1_it->~DPNPC_id(); \ input2_it->~DPNPC_id(); \ @@ -961,11 +946,42 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap) } \ else if (use_strides) \ { \ + if ((result_ndim != input1_ndim) || (result_ndim != input2_ndim)) \ + { \ + throw std::runtime_error("Result ndim=" + std::to_string(result_ndim) + \ + " mismatches with either input1 ndim=" + std::to_string(input1_ndim) + \ + " or input2 ndim=" + std::to_string(input2_ndim)); \ + } \ + \ + /* memory transfer optimization, use USM-host for temporary speeds up tranfer to device */ \ + using usm_host_allocatorT = sycl::usm_allocator; \ + \ + size_t strides_size = 3 * result_ndim; \ + shape_elem_type *dev_strides_data = sycl::malloc_device(strides_size, q); \ + \ + /* create host temporary for packed strides managed by shared pointer */ \ + auto strides_host_packed = std::vector(strides_size, \ + usm_host_allocatorT(q)); \ + \ + /* packed vector is concatenation of result_strides, input1_strides and input2_strides */ \ + std::copy(result_strides, result_strides + result_ndim, strides_host_packed.begin()); \ + std::copy(input1_strides, input1_strides + result_ndim, strides_host_packed.begin() + result_ndim); \ + std::copy(input2_strides, input2_strides + result_ndim, strides_host_packed.begin() + 2 * result_ndim); \ + \ + auto copy_strides_ev = q.copy(strides_host_packed.data(), \ + dev_strides_data, \ + strides_host_packed.size()); \ + \ auto kernel_parallel_for_func = [=](sycl::id<1> global_id) { \ - const size_t output_id = global_id[0]; /*for (size_t i = 0; i < result_size; ++i)*/ \ + const size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */ \ { \ + const shape_elem_type *result_strides_data = &dev_strides_data[0]; \ + const shape_elem_type *input1_strides_data = &dev_strides_data[1]; \ + const shape_elem_type *input2_strides_data = &dev_strides_data[2]; \ + \ size_t input1_id = 0; \ size_t input2_id = 0; \ + \ for (size_t i = 0; i < result_ndim; ++i) \ { \ const size_t output_xyz_id = \ @@ -976,33 +992,88 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap) \ const _DataType_output input1_elem = input1_data[input1_id]; \ const _DataType_output input2_elem = input2_data[input2_id]; \ - result[output_id] = __operation1__; \ + result[output_id] = __operation__; \ } \ }; \ auto kernel_func = [&](sycl::handler& cgh) { \ + cgh.depends_on(copy_strides_ev); \ cgh.parallel_for< \ class __name__##_strides_kernel<_DataType_output, _DataType_input1, _DataType_input2>>( \ gws, kernel_parallel_for_func); \ }; \ \ - event = q.submit(kernel_func); \ + q.submit(kernel_func).wait(); \ + \ + sycl::free(dev_strides_data, q); \ + return event_ref; \ } \ else \ { \ - if constexpr ((std::is_same<_DataType_input1, double>::value || \ - std::is_same<_DataType_input1, float>::value) && \ - std::is_same<_DataType_input2, _DataType_input1>::value) \ + if constexpr (both_types_are_same<_DataType_input1, _DataType_input2, __mkl_types__>) \ { \ - event = __operation2__(q, result_size, input1_data, input2_data, result); \ + event = __mkl_operation__(q, result_size, input1_data, input2_data, result); \ } \ - else \ + else if constexpr (none_of_both_types<_DataType_input1, _DataType_input2, \ + std::complex, std::complex>) \ + { \ + constexpr size_t lws = 64; \ + constexpr unsigned int vec_sz = 8; \ + constexpr sycl::access::address_space global_space = sycl::access::address_space::global_space; \ + \ + auto gws_range = sycl::range<1>(((result_size + lws * vec_sz - 1) / (lws * vec_sz)) * lws); \ + auto lws_range = sycl::range<1>(lws); \ + \ + auto kernel_parallel_for_func = [=](sycl::nd_item<1> nd_it) { \ + auto sg = nd_it.get_sub_group(); \ + size_t start = vec_sz * (nd_it.get_group(0) * nd_it.get_local_range(0) + \ + sg.get_group_id()[0] * sg.get_max_local_range()[0]); \ + size_t end = start + static_cast(vec_sz); \ + \ + if (end < result_size) { \ + sycl::vec<_DataType_input1, vec_sz> x1 = \ + sg.load(sycl::multi_ptr<_DataType_input1, global_space>(&input1_data[start])); \ + sycl::vec<_DataType_input2, vec_sz> x2 = \ + sg.load(sycl::multi_ptr<_DataType_input2, global_space>(&input2_data[start])); \ + sycl::vec<_DataType_output, vec_sz> res_vec; \ + \ + if constexpr (both_types_are_same<_DataType_input1, _DataType_input2, __vec_types__>) \ + { \ + res_vec = __vec_operation__; \ + } \ + else \ + { \ + for (size_t k = 0; k < vec_sz; ++k) { \ + const _DataType_output input1_elem = x1[k]; \ + const _DataType_output input2_elem = x2[k]; \ + res_vec[k] = __operation__; \ + } \ + } \ + sg.store(sycl::multi_ptr<_DataType_output, global_space>(&result[start]), res_vec); \ + \ + } \ + else { \ + for (size_t k = start; k < result_size; ++k) { \ + const _DataType_output input1_elem = input1_data[k]; \ + const _DataType_output input2_elem = input2_data[k]; \ + result[k] = __operation__; \ + } \ + } \ + }; \ + \ + auto kernel_func = [&](sycl::handler& cgh) { \ + cgh.parallel_for>(\ + sycl::nd_range<1>(gws_range, lws_range), kernel_parallel_for_func); \ + }; \ + event = q.submit(kernel_func); \ + } \ + else /* either input1 or input2 has complex type */ \ { \ auto kernel_parallel_for_func = [=](sycl::id<1> global_id) { \ - const size_t i = global_id[0]; /*for (size_t i = 0; i < result_size; ++i)*/ \ + const size_t i = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */ \ { \ const _DataType_output input1_elem = input1_data[i]; \ const _DataType_output input2_elem = input2_data[i]; \ - result[i] = __operation1__; \ + result[i] = __operation__; \ } \ }; \ auto kernel_func = [&](sycl::handler& cgh) { \ @@ -1013,18 +1084,7 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap) } \ } \ \ - input1_ptr.depends_on(event); \ - input1_shape_ptr.depends_on(event); \ - input1_strides_ptr.depends_on(event); \ - input2_ptr.depends_on(event); \ - input2_shape_ptr.depends_on(event); \ - input2_strides_ptr.depends_on(event); \ - result_ptr.depends_on(event); \ - result_shape_ptr.depends_on(event); \ - result_strides_ptr.depends_on(event); \ - \ event_ref = reinterpret_cast(&event); \ - \ return DPCTLEvent_Copy(event_ref); \ } \ \ @@ -1114,6 +1174,29 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap) #include +template +static void func_map_elemwise_2arg_3type_core(func_map_t& fmap) +{ + ((fmap[DPNPFuncName::DPNP_FN_ADD_EXT][FT1][FTs] = + {populate_func_types(), + (void*)dpnp_add_c_ext()>, + func_type_map_t::find_type, + func_type_map_t::find_type>}), + ...); + ((fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][FT1][FTs] = + {populate_func_types(), + (void*)dpnp_multiply_c_ext()>, + func_type_map_t::find_type, + func_type_map_t::find_type>}), + ...); +} + +template +static void func_map_elemwise_2arg_3type_helper(func_map_t& fmap) +{ + ((func_map_elemwise_2arg_3type_core(fmap)), ...); +} + static void func_map_init_elemwise_2arg_3type(func_map_t& fmap) { fmap[DPNPFuncName::DPNP_FN_ADD][eft_INT][eft_INT] = {eft_INT, @@ -1149,39 +1232,6 @@ static void func_map_init_elemwise_2arg_3type(func_map_t& fmap) fmap[DPNPFuncName::DPNP_FN_ADD][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_add_c_default}; - fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_INT][eft_INT] = {eft_INT, - (void*)dpnp_add_c_ext}; - fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_INT][eft_LNG] = {eft_LNG, - (void*)dpnp_add_c_ext}; - fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_INT][eft_FLT] = {eft_DBL, - (void*)dpnp_add_c_ext}; - fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_INT][eft_DBL] = {eft_DBL, - (void*)dpnp_add_c_ext}; - fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_LNG][eft_INT] = {eft_LNG, - (void*)dpnp_add_c_ext}; - fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_LNG][eft_LNG] = {eft_LNG, - (void*)dpnp_add_c_ext}; - fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_LNG][eft_FLT] = {eft_DBL, - (void*)dpnp_add_c_ext}; - fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_LNG][eft_DBL] = {eft_DBL, - (void*)dpnp_add_c_ext}; - fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_FLT][eft_INT] = {eft_DBL, - (void*)dpnp_add_c_ext}; - fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_FLT][eft_LNG] = {eft_DBL, - (void*)dpnp_add_c_ext}; - fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_FLT][eft_FLT] = {eft_FLT, - (void*)dpnp_add_c_ext}; - fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_FLT][eft_DBL] = {eft_DBL, - (void*)dpnp_add_c_ext}; - fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_DBL][eft_INT] = {eft_DBL, - (void*)dpnp_add_c_ext}; - fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_DBL][eft_LNG] = {eft_DBL, - (void*)dpnp_add_c_ext}; - fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_DBL][eft_FLT] = {eft_DBL, - (void*)dpnp_add_c_ext}; - fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_DBL][eft_DBL] = {eft_DBL, - (void*)dpnp_add_c_ext}; - fmap[DPNPFuncName::DPNP_FN_ARCTAN2][eft_INT][eft_INT] = {eft_DBL, (void*)dpnp_arctan2_c_default}; fmap[DPNPFuncName::DPNP_FN_ARCTAN2][eft_INT][eft_LNG] = {eft_DBL, @@ -1725,111 +1775,6 @@ static void func_map_init_elemwise_2arg_3type(func_map_t& fmap) fmap[DPNPFuncName::DPNP_FN_MULTIPLY][eft_C128][eft_C128] = { eft_C128, (void*)dpnp_multiply_c_default, std::complex, std::complex>}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_BLN][eft_BLN] = { - eft_BLN, (void*)dpnp_multiply_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_BLN][eft_INT] = { - eft_INT, (void*)dpnp_multiply_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_BLN][eft_LNG] = { - eft_LNG, (void*)dpnp_multiply_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_BLN][eft_FLT] = { - eft_FLT, (void*)dpnp_multiply_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_BLN][eft_DBL] = { - eft_DBL, (void*)dpnp_multiply_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_BLN][eft_C64] = { - eft_C64, (void*)dpnp_multiply_c_ext, bool, std::complex>}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_BLN][eft_C128] = { - eft_C128, (void*)dpnp_multiply_c_ext, bool, std::complex>}; - - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_INT][eft_BLN] = { - eft_INT, (void*)dpnp_multiply_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_INT][eft_INT] = { - eft_INT, (void*)dpnp_multiply_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_INT][eft_LNG] = { - eft_LNG, (void*)dpnp_multiply_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_INT][eft_FLT] = { - eft_FLT, (void*)dpnp_multiply_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_INT][eft_DBL] = { - eft_DBL, (void*)dpnp_multiply_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_INT][eft_C64] = { - eft_C64, (void*)dpnp_multiply_c_ext, int32_t, std::complex>}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_INT][eft_C128] = { - eft_C128, (void*)dpnp_multiply_c_ext, int32_t, std::complex>}; - - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_LNG][eft_BLN] = { - eft_LNG, (void*)dpnp_multiply_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_LNG][eft_INT] = { - eft_LNG, (void*)dpnp_multiply_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_LNG][eft_LNG] = { - eft_LNG, (void*)dpnp_multiply_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_LNG][eft_FLT] = { - eft_FLT, (void*)dpnp_multiply_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_LNG][eft_DBL] = { - eft_DBL, (void*)dpnp_multiply_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_LNG][eft_C64] = { - eft_C64, (void*)dpnp_multiply_c_ext, int64_t, std::complex>}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_LNG][eft_C128] = { - eft_C128, (void*)dpnp_multiply_c_ext, int64_t, std::complex>}; - - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_FLT][eft_BLN] = { - eft_FLT, (void*)dpnp_multiply_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_FLT][eft_INT] = { - eft_FLT, (void*)dpnp_multiply_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_FLT][eft_LNG] = { - eft_FLT, (void*)dpnp_multiply_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_FLT][eft_FLT] = { - eft_FLT, (void*)dpnp_multiply_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_FLT][eft_DBL] = { - eft_DBL, (void*)dpnp_multiply_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_FLT][eft_C64] = { - eft_C64, (void*)dpnp_multiply_c_ext, float, std::complex>}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_FLT][eft_C128] = { - eft_C128, (void*)dpnp_multiply_c_ext, float, std::complex>}; - - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_DBL][eft_BLN] = { - eft_DBL, (void*)dpnp_multiply_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_DBL][eft_INT] = { - eft_DBL, (void*)dpnp_multiply_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_DBL][eft_LNG] = { - eft_DBL, (void*)dpnp_multiply_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_DBL][eft_FLT] = { - eft_DBL, (void*)dpnp_multiply_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_DBL][eft_DBL] = { - eft_DBL, (void*)dpnp_multiply_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_DBL][eft_C64] = { - eft_C64, (void*)dpnp_multiply_c_ext, double, std::complex>}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_DBL][eft_C128] = { - eft_C128, (void*)dpnp_multiply_c_ext, double, std::complex>}; - - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_C64][eft_BLN] = { - eft_C64, (void*)dpnp_multiply_c_ext, std::complex, bool>}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_C64][eft_INT] = { - eft_C64, (void*)dpnp_multiply_c_ext, std::complex, int32_t>}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_C64][eft_LNG] = { - eft_C64, (void*)dpnp_multiply_c_ext, std::complex, int64_t>}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_C64][eft_FLT] = { - eft_C64, (void*)dpnp_multiply_c_ext, std::complex, float>}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_C64][eft_DBL] = { - eft_C64, (void*)dpnp_multiply_c_ext, std::complex, double>}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_C64][eft_C64] = { - eft_C64, (void*)dpnp_multiply_c_ext, std::complex, std::complex>}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_C64][eft_C128] = { - eft_C128, (void*)dpnp_multiply_c_ext, std::complex, std::complex>}; - - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_C128][eft_BLN] = { - eft_C128, (void*)dpnp_multiply_c_ext, std::complex, bool>}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_C128][eft_INT] = { - eft_C128, (void*)dpnp_multiply_c_ext, std::complex, int32_t>}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_C128][eft_LNG] = { - eft_C128, (void*)dpnp_multiply_c_ext, std::complex, int64_t>}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_C128][eft_FLT] = { - eft_C128, (void*)dpnp_multiply_c_ext, std::complex, float>}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_C128][eft_DBL] = { - eft_C128, (void*)dpnp_multiply_c_ext, std::complex, double>}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_C128][eft_C64] = { - eft_C128, (void*)dpnp_multiply_c_ext, std::complex, std::complex>}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_C128][eft_C128] = { - eft_C128, (void*)dpnp_multiply_c_ext, std::complex, std::complex>}; - fmap[DPNPFuncName::DPNP_FN_POWER][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_power_c_default}; fmap[DPNPFuncName::DPNP_FN_POWER][eft_INT][eft_LNG] = {eft_LNG, @@ -1962,6 +1907,8 @@ static void func_map_init_elemwise_2arg_3type(func_map_t& fmap) fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_DBL][eft_DBL] = { eft_DBL, (void*)dpnp_subtract_c_ext}; + func_map_elemwise_2arg_3type_helper(fmap); + return; } diff --git a/dpnp/backend/src/dpnp_fptr.hpp b/dpnp/backend/src/dpnp_fptr.hpp index 76116cafae71..4cb664858319 100644 --- a/dpnp/backend/src/dpnp_fptr.hpp +++ b/dpnp/backend/src/dpnp_fptr.hpp @@ -99,6 +99,55 @@ typedef func_type_map_factory_t, func_type_pair_t>, func_type_pair_t>> func_type_map_t; +/** + * Return an enum value of result type populated from input types. + */ +template +static constexpr DPNPFuncType populate_func_types() +{ + if constexpr (FT1 == DPNPFuncType::DPNP_FT_NONE) + { + throw std::runtime_error("Templated enum value of FT1 is None"); + } + else if constexpr (FT2 == DPNPFuncType::DPNP_FT_NONE) + { + throw std::runtime_error("Templated enum value of FT2 is None"); + } + return (FT1 < FT2) ? FT2 : FT1; +} + +/** + * Removes parentheses for a passed list of types separated by comma. + * It's intended to be used in operations macro. + */ +#define MACRO_UNPACK_TYPES(...) __VA_ARGS__ + +/** + * Implements std::is_same<> with variadic number of types to compare with + * and when type T has to match only one of types Ts. + */ +template +struct is_any : std::disjunction...> {}; + +/** + * Implements std::is_same<> with variadic number of types to compare with + * and when type T has to match every type from Ts sequence. + */ +template +struct are_same : std::conjunction...> {}; + +/** + * A template constat to check if both types T1 and T2 match every type from Ts sequence. + */ +template +constexpr auto both_types_are_same = std::conjunction_v, are_same>; + +/** + * A template constat to check if both types T1 and T2 don't match any type from Ts sequence. + */ +template +constexpr auto none_of_both_types = !std::disjunction_v, is_any>; + /** * FPTR interface initialization functions */ diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py index ce9f340e8e45..8104fdea6735 100644 --- a/dpnp/dpnp_iface_mathematical.py +++ b/dpnp/dpnp_iface_mathematical.py @@ -154,56 +154,64 @@ def absolute(x1, **kwargs): return call_origin(numpy.absolute, x1, **kwargs) -def add(x1, x2, dtype=None, out=None, where=True, **kwargs): +def add(x1, + x2, + /, + out=None, + *, + where=True, + dtype=None, + subok=True, + **kwargs): """ Add arguments element-wise. For full documentation refer to :obj:`numpy.add`. + Returns + ------- + add : dpnp.ndarray + The sum of `x1` and `x2`, element-wise. + Limitations ----------- - Parameters ``x1`` and ``x2`` are supported as either :obj:`dpnp.ndarray` or scalar. - Parameters ``dtype``, ``out`` and ``where`` are supported with their default values. + Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar, + but not both (at least either `x1` or `x2` should be as :class:`dpnp.ndarray`). + Parameters `out`, `where`, `dtype` and `subok` are supported with their default values. Keyword arguments ``kwargs`` are currently unsupported. - Otherwise the functions will be executed sequentially on CPU. + Otherwise the function will be executed sequentially on CPU. Input array data types are limited by supported DPNP :ref:`Data types`. Examples -------- - >>> import dpnp as np - >>> a = np.array([1, 2, 3]) - >>> b = np.array([1, 2, 3]) - >>> result = np.add(a, b) - >>> [x for x in result] + >>> import dpnp as dp + >>> a = dp.array([1, 2, 3]) + >>> b = dp.array([1, 2, 3]) + >>> result = dp.add(a, b) + >>> print(result) [2, 4, 6] """ - x1_is_scalar = dpnp.isscalar(x1) - x2_is_scalar = dpnp.isscalar(x2) - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False) - x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False) + if out is not None: + pass + elif where is not True: + pass + elif dtype is not None: + pass + elif subok is not True: + pass + elif dpnp.isscalar(x1) and dpnp.isscalar(x2): + # at least either x1 or x2 has to be an array + pass + else: + # get a common queue to copy data from the host into a device if any input is scalar + queue = get_common_allocation_queue([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else None - if x1_desc and x2_desc and not kwargs: - if not x1_desc and not x1_is_scalar: - pass - elif not x2_desc and not x2_is_scalar: - pass - elif x1_is_scalar and x2_is_scalar: - pass - elif x1_desc and x1_desc.ndim == 0: - pass - elif x2_desc and x2_desc.ndim == 0: - pass - elif dtype is not None: - pass - elif out is not None: - pass - elif not where: - pass - else: - out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) if out is not None else None - return dpnp_add(x1_desc, x2_desc, dtype, out_desc, where).get_pyobj() + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue) + x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue) + if x1_desc and x2_desc: + return dpnp_add(x1_desc, x2_desc, dtype=dtype, out=out, where=where).get_pyobj() return call_origin(numpy.add, x1, x2, dtype=dtype, out=out, where=where, **kwargs) @@ -1093,11 +1101,11 @@ def multiply(x1, ------- y : {dpnp.ndarray, scalar} The product of `x1` and `x2`, element-wise. - The result is a scalar if both x1 and x2 are scalars. Limitations ----------- - Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar. + Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar, + but not both (at least either `x1` or `x2` should be as :class:`dpnp.ndarray`). Parameters `out`, `where`, `dtype` and `subok` are supported with their default values. Keyword arguments ``kwargs`` are currently unsupported. Otherwise the functions will be executed sequentially on CPU. @@ -1122,8 +1130,8 @@ def multiply(x1, elif subok is not True: pass elif dpnp.isscalar(x1) and dpnp.isscalar(x2): - # keep the result in host memory, if both inputs are scalars - return x1 * x2 + # at least either x1 or x2 has to be an array + pass else: # get a common queue to copy data from the host into a device if any input is scalar queue = get_common_allocation_queue([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else None diff --git a/tests/skipped_tests.tbl b/tests/skipped_tests.tbl index 63c6cbd0d133..acd920580cf0 100644 --- a/tests/skipped_tests.tbl +++ b/tests/skipped_tests.tbl @@ -765,17 +765,12 @@ tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_para tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_547_{arg1=array([[1, 2, 3], [4, 5, 6]]), arg2=array([[0, 1, 2], [3, 4, 5]]), dtype=float64, name='remainder', use_dtype=False}::test_binary tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_549_{arg1=array([[1, 2, 3], [4, 5, 6]]), arg2=array([[0, 1, 2], [3, 4, 5]]), dtype=float64, name='mod', use_dtype=False}::test_binary tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticModf::test_modf -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_0_{name='reciprocal', nargs=1}::test_raises_with_numpy_input + tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_10_{name='remainder', nargs=2}::test_raises_with_numpy_input tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_11_{name='mod', nargs=2}::test_raises_with_numpy_input tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_1_{name='angle', nargs=1}::test_raises_with_numpy_input -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_2_{name='add', nargs=2}::test_raises_with_numpy_input -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_4_{name='divide', nargs=2}::test_raises_with_numpy_input -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_5_{name='power', nargs=2}::test_raises_with_numpy_input -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_6_{name='subtract', nargs=2}::test_raises_with_numpy_input -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_7_{name='true_divide', nargs=2}::test_raises_with_numpy_input tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_8_{name='floor_divide', nargs=2}::test_raises_with_numpy_input -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_9_{name='fmod', nargs=2}::test_raises_with_numpy_input + tests/third_party/cupy/math_tests/test_arithmetic.py::TestBoolSubtract_param_3_{shape=(), xp=dpnp}::test_bool_subtract tests/third_party/cupy/math_tests/test_explog.py::TestExplog::test_logaddexp tests/third_party/cupy/math_tests/test_explog.py::TestExplog::test_logaddexp2 diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl index c64c7fa45f99..f34ac97fe065 100644 --- a/tests/skipped_tests_gpu.tbl +++ b/tests/skipped_tests_gpu.tbl @@ -91,18 +91,7 @@ tests/third_party/cupy/indexing_tests/test_insert.py::TestDiagIndicesInvalidValu tests/third_party/cupy/indexing_tests/test_insert.py::TestDiagIndicesFrom_param_0_{shape=(3, 3)}::test_diag_indices_from tests/third_party/cupy/indexing_tests/test_insert.py::TestDiagIndicesFrom_param_1_{shape=(0, 0)}::test_diag_indices_from tests/third_party/cupy/indexing_tests/test_insert.py::TestDiagIndicesFrom_param_2_{shape=(2, 2, 2)}::test_diag_indices_from -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_295_{arg1=array([[1., 2., 3.], [4., 5., 6.]], dtype=float32), arg2=array([[0, 1, 2], [3, 4, 5]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_303_{arg1=array([[1., 2., 3.], [4., 5., 6.]], dtype=float32), arg2=array([[0, 1, 2], [3, 4, 5]], dtype=int64), dtype=float64, name='floor_divide', use_dtype=False}::test_binary -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_375_{arg1=array([[1., 2., 3.], [4., 5., 6.]]), arg2=array([[0, 1, 2], [3, 4, 5]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_383_{arg1=array([[1., 2., 3.], [4., 5., 6.]]), arg2=array([[0, 1, 2], [3, 4, 5]], dtype=int64), dtype=float64, name='floor_divide', use_dtype=False}::test_binary -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_439_{arg1=array([[1, 2, 3], [4, 5, 6]]), arg2=array([[0., 1., 2.], [3., 4., 5.]], dtype=float32), dtype=float64, name='floor_divide', use_dtype=False}::test_binary -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_447_{arg1=array([[1, 2, 3], [4, 5, 6]]), arg2=array([[0., 1., 2.], [3., 4., 5.]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_455_{arg1=array([[1, 2, 3], [4, 5, 6]]), arg2=array([[0, 1, 2], [3, 4, 5]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_463_{arg1=array([[1, 2, 3], [4, 5, 6]]), arg2=array([[0, 1, 2], [3, 4, 5]], dtype=int64), dtype=float64, name='floor_divide', use_dtype=False}::test_binary -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_519_{arg1=array([[1, 2, 3], [4, 5, 6]], dtype=int64), arg2=array([[0., 1., 2.], [3., 4., 5.]], dtype=float32), dtype=float64, name='floor_divide', use_dtype=False}::test_binary -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_527_{arg1=array([[1, 2, 3], [4, 5, 6]], dtype=int64), arg2=array([[0., 1., 2.], [3., 4., 5.]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_535_{arg1=array([[1, 2, 3], [4, 5, 6]], dtype=int64), arg2=array([[0, 1, 2], [3, 4, 5]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_543_{arg1=array([[1, 2, 3], [4, 5, 6]], dtype=int64), arg2=array([[0, 1, 2], [3, 4, 5]], dtype=int64), dtype=float64, name='floor_divide', use_dtype=False}::test_binary + tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_external_prod_all tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_external_prod_axis tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_external_sum_all @@ -969,35 +958,17 @@ tests/third_party/cupy/manipulation_tests/test_tiling.py::TestTile_param_4_{reps tests/third_party/cupy/manipulation_tests/test_tiling.py::TestTile_param_5_{reps=(2, 3, 4, 5)}::test_array_tile tests/third_party/cupy/manipulation_tests/test_transpose.py::TestTranspose::test_moveaxis_invalid5_2 tests/third_party/cupy/manipulation_tests/test_transpose.py::TestTranspose::test_moveaxis_invalid5_3 -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_279_{arg1=array([[1., 2., 3.], [4., 5., 6.]], dtype=float32), arg2=array([[0., 1., 2.], [3., 4., 5.]], dtype=float32), dtype=float64, name='floor_divide', use_dtype=False}::test_binary -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_287_{arg1=array([[1., 2., 3.], [4., 5., 6.]], dtype=float32), arg2=array([[0., 1., 2.], [3., 4., 5.]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_295_{arg1=array([[1., 2., 3.], [4., 5., 6.]], dtype=float32), arg2=array([[0, 1, 2], [3, 4, 5]], dtype=int32), dtype=float64, name='floor_divide', use_dtype=False}::test_binary -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_303_{arg1=array([[1., 2., 3.], [4., 5., 6.]], dtype=float32), arg2=array([[0, 1, 2], [3, 4, 5]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_359_{arg1=array([[1., 2., 3.], [4., 5., 6.]]), arg2=array([[0., 1., 2.], [3., 4., 5.]], dtype=float32), dtype=float64, name='floor_divide', use_dtype=False}::test_binary -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_367_{arg1=array([[1., 2., 3.], [4., 5., 6.]]), arg2=array([[0., 1., 2.], [3., 4., 5.]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_375_{arg1=array([[1., 2., 3.], [4., 5., 6.]]), arg2=array([[0, 1, 2], [3, 4, 5]], dtype=int32), dtype=float64, name='floor_divide', use_dtype=False}::test_binary -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_383_{arg1=array([[1., 2., 3.], [4., 5., 6.]]), arg2=array([[0, 1, 2], [3, 4, 5]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_439_{arg1=array([[1, 2, 3], [4, 5, 6]], dtype=int32), arg2=array([[0., 1., 2.], [3., 4., 5.]], dtype=float32), dtype=float64, name='floor_divide', use_dtype=False}::test_binary -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_447_{arg1=array([[1, 2, 3], [4, 5, 6]], dtype=int32), arg2=array([[0., 1., 2.], [3., 4., 5.]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary + tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_455_{arg1=array([[1, 2, 3], [4, 5, 6]], dtype=int32), arg2=array([[0, 1, 2], [3, 4, 5]], dtype=int32), dtype=float64, name='floor_divide', use_dtype=False}::test_binary tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_463_{arg1=array([[1, 2, 3], [4, 5, 6]], dtype=int32), arg2=array([[0, 1, 2], [3, 4, 5]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_519_{arg1=array([[1, 2, 3], [4, 5, 6]]), arg2=array([[0., 1., 2.], [3., 4., 5.]], dtype=float32), dtype=float64, name='floor_divide', use_dtype=False}::test_binary -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_527_{arg1=array([[1, 2, 3], [4, 5, 6]]), arg2=array([[0., 1., 2.], [3., 4., 5.]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_535_{arg1=array([[1, 2, 3], [4, 5, 6]]), arg2=array([[0, 1, 2], [3, 4, 5]], dtype=int32), dtype=float64, name='floor_divide', use_dtype=False}::test_binary tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_543_{arg1=array([[1, 2, 3], [4, 5, 6]]), arg2=array([[0, 1, 2], [3, 4, 5]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticModf::test_modf -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_0_{name='reciprocal', nargs=1}::test_raises_with_numpy_input + tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_10_{name='remainder', nargs=2}::test_raises_with_numpy_input tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_11_{name='mod', nargs=2}::test_raises_with_numpy_input tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_1_{name='angle', nargs=1}::test_raises_with_numpy_input -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_2_{name='add', nargs=2}::test_raises_with_numpy_input -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_4_{name='divide', nargs=2}::test_raises_with_numpy_input -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_5_{name='power', nargs=2}::test_raises_with_numpy_input -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_6_{name='subtract', nargs=2}::test_raises_with_numpy_input -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_7_{name='true_divide', nargs=2}::test_raises_with_numpy_input tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_8_{name='floor_divide', nargs=2}::test_raises_with_numpy_input -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_9_{name='fmod', nargs=2}::test_raises_with_numpy_input -tests/third_party/cupy/math_tests/test_arithmetic.py::TestBoolSubtract_param_3_{shape=(), xp=dpnp}::test_bool_subtract + tests/third_party/cupy/math_tests/test_explog.py::TestExplog::test_logaddexp tests/third_party/cupy/math_tests/test_explog.py::TestExplog::test_logaddexp2 tests/third_party/cupy/math_tests/test_floating.py::TestFloating::test_copysign_float diff --git a/tests/test_indexing.py b/tests/test_indexing.py index 091cf1345c4e..1a40777afac8 100644 --- a/tests/test_indexing.py +++ b/tests/test_indexing.py @@ -3,6 +3,9 @@ import dpnp import numpy +from numpy.testing import ( + assert_array_equal +) @pytest.mark.usefixtures("allow_fall_back_on_numpy") @@ -16,7 +19,7 @@ def test_choose(): expected = numpy.choose([0, 0, 0, 0], [a, b, c]) result = dpnp.choose([0, 0, 0, 0], [ia, ib, ic]) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) @pytest.mark.parametrize("offset", @@ -47,7 +50,7 @@ def test_diagonal(array, offset): ia = dpnp.array(a) expected = numpy.diagonal(a, offset) result = dpnp.diagonal(ia, offset) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) @pytest.mark.parametrize("val", @@ -71,7 +74,7 @@ def test_fill_diagonal(array, val): ia = dpnp.array(a) expected = numpy.fill_diagonal(a, val) result = dpnp.fill_diagonal(ia, val) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) @pytest.mark.parametrize("dimension", @@ -81,7 +84,7 @@ def test_fill_diagonal(array, val): def test_indices(dimension): expected = numpy.indices(dimension) result = dpnp.indices(dimension) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) @pytest.mark.parametrize("array", @@ -107,7 +110,7 @@ def test_nonzero(array): ia = dpnp.array(array) expected = numpy.nonzero(a) result = dpnp.nonzero(ia) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) @pytest.mark.usefixtures("allow_fall_back_on_numpy") @@ -137,7 +140,7 @@ def test_place1(arr, mask, vals): im = dpnp.array(m) numpy.place(a, m, vals) dpnp.place(ia, im, vals) - numpy.testing.assert_array_equal(a, ia) + assert_array_equal(a, ia) @pytest.mark.usefixtures("allow_fall_back_on_numpy") @@ -161,7 +164,7 @@ def test_place2(arr, mask, vals): im = dpnp.array(m) numpy.place(a, m, vals) dpnp.place(ia, im, vals) - numpy.testing.assert_array_equal(a, ia) + assert_array_equal(a, ia) @pytest.mark.usefixtures("allow_fall_back_on_numpy") @@ -186,7 +189,7 @@ def test_place3(arr, mask, vals): im = dpnp.array(m) numpy.place(a, m, vals) dpnp.place(ia, im, vals) - numpy.testing.assert_array_equal(a, ia) + assert_array_equal(a, ia) @pytest.mark.parametrize("v", @@ -211,7 +214,7 @@ def test_put(array, ind, v): ia = dpnp.array(a) numpy.put(a, ind, v) dpnp.put(ia, ind, v) - numpy.testing.assert_array_equal(a, ia) + assert_array_equal(a, ia) @pytest.mark.parametrize("v", @@ -236,7 +239,7 @@ def test_put2(array, ind, v): ia = dpnp.array(a) numpy.put(a, ind, v) dpnp.put(ia, ind, v) - numpy.testing.assert_array_equal(a, ia) + assert_array_equal(a, ia) def test_put3(): @@ -244,7 +247,7 @@ def test_put3(): ia = dpnp.array(a) dpnp.put(ia, [0, 2], [-44, -55]) numpy.put(a, [0, 2], [-44, -55]) - numpy.testing.assert_array_equal(a, ia) + assert_array_equal(a, ia) @pytest.mark.usefixtures("allow_fall_back_on_numpy") @@ -256,7 +259,7 @@ def test_put_along_axis_val_int(): for axis in range(2): numpy.put_along_axis(a, ind_r, 777, axis) dpnp.put_along_axis(ai, ind_r_i, 777, axis) - numpy.testing.assert_array_equal(a, ai) + assert_array_equal(a, ai) @pytest.mark.usefixtures("allow_fall_back_on_numpy") @@ -268,7 +271,7 @@ def test_put_along_axis1(): for axis in range(3): numpy.put_along_axis(a, ind_r, 777, axis) dpnp.put_along_axis(ai, ind_r_i, 777, axis) - numpy.testing.assert_array_equal(a, ai) + assert_array_equal(a, ai) @pytest.mark.usefixtures("allow_fall_back_on_numpy") @@ -280,7 +283,7 @@ def test_put_along_axis2(): for axis in range(3): numpy.put_along_axis(a, ind_r, [100, 200, 300, 400], axis) dpnp.put_along_axis(ai, ind_r_i, [100, 200, 300, 400], axis) - numpy.testing.assert_array_equal(a, ai) + assert_array_equal(a, ai) @pytest.mark.parametrize("vals", @@ -309,7 +312,7 @@ def test_putmask1(arr, mask, vals): iv = dpnp.array(v) numpy.putmask(a, m, v) dpnp.putmask(ia, im, iv) - numpy.testing.assert_array_equal(a, ia) + assert_array_equal(a, ia) @pytest.mark.parametrize("vals", @@ -334,7 +337,7 @@ def test_putmask2(arr, mask, vals): iv = dpnp.array(v) numpy.putmask(a, m, v) dpnp.putmask(ia, im, iv) - numpy.testing.assert_array_equal(a, ia) + assert_array_equal(a, ia) @pytest.mark.parametrize("vals", @@ -360,7 +363,7 @@ def test_putmask3(arr, mask, vals): iv = dpnp.array(v) numpy.putmask(a, m, v) dpnp.putmask(ia, im, iv) - numpy.testing.assert_array_equal(a, ia) + assert_array_equal(a, ia) def test_select(): @@ -378,7 +381,7 @@ def test_select(): ichoicelist = [ichoice_val1, ichoice_val2] expected = numpy.select(condlist, choicelist) result = dpnp.select(icondlist, ichoicelist) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) @pytest.mark.parametrize("array_type", @@ -415,10 +418,9 @@ def test_take(array, indices, array_type, indices_type): iind = dpnp.array(ind) expected = numpy.take(a, ind) result = dpnp.take(ia, iind) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) -@pytest.mark.usefixtures("allow_fall_back_on_numpy") def test_take_along_axis(): a = numpy.arange(16).reshape(4, 4) ai = dpnp.array(a) @@ -427,10 +429,9 @@ def test_take_along_axis(): for axis in range(2): expected = numpy.take_along_axis(a, ind_r, axis) result = dpnp.take_along_axis(ai, ind_r_i, axis) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) -@pytest.mark.usefixtures("allow_fall_back_on_numpy") def test_take_along_axis1(): a = numpy.arange(64).reshape(4, 4, 4) ai = dpnp.array(a) @@ -439,7 +440,7 @@ def test_take_along_axis1(): for axis in range(3): expected = numpy.take_along_axis(a, ind_r, axis) result = dpnp.take_along_axis(ai, ind_r_i, axis) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) @pytest.mark.parametrize("m", @@ -454,7 +455,7 @@ def test_take_along_axis1(): def test_tril_indices(n, k, m): result = dpnp.tril_indices(n, k, m) expected = numpy.tril_indices(n, k, m) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) @pytest.mark.parametrize("k", @@ -472,7 +473,7 @@ def test_tril_indices_from(array, k): ia = dpnp.array(a) result = dpnp.tril_indices_from(ia, k) expected = numpy.tril_indices_from(a, k) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) @pytest.mark.parametrize("m", @@ -487,7 +488,7 @@ def test_tril_indices_from(array, k): def test_triu_indices(n, k, m): result = dpnp.triu_indices(n, k, m) expected = numpy.triu_indices(n, k, m) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) @pytest.mark.parametrize("k", @@ -505,4 +506,4 @@ def test_triu_indices_from(array, k): ia = dpnp.array(a) result = dpnp.triu_indices_from(ia, k) expected = numpy.triu_indices_from(a, k) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) diff --git a/tests/test_mathematical.py b/tests/test_mathematical.py index 21071bec41e9..70e0bd73dc50 100644 --- a/tests/test_mathematical.py +++ b/tests/test_mathematical.py @@ -1,8 +1,15 @@ import pytest +from .helper import get_all_dtypes import dpnp import numpy +from numpy.testing import ( + assert_allclose, + assert_array_almost_equal, + assert_array_equal, + assert_raises +) @pytest.mark.usefixtures("allow_fall_back_on_numpy") @@ -10,27 +17,27 @@ class TestConvolve: def test_object(self): d = [1.] * 100 k = [1.] * 3 - numpy.testing.assert_array_almost_equal(dpnp.convolve(d, k)[2:-2], dpnp.full(98, 3)) + assert_array_almost_equal(dpnp.convolve(d, k)[2:-2], dpnp.full(98, 3)) def test_no_overwrite(self): d = dpnp.ones(100) k = dpnp.ones(3) dpnp.convolve(d, k) - numpy.testing.assert_array_equal(d, dpnp.ones(100)) - numpy.testing.assert_array_equal(k, dpnp.ones(3)) + assert_array_equal(d, dpnp.ones(100)) + assert_array_equal(k, dpnp.ones(3)) def test_mode(self): d = dpnp.ones(100) k = dpnp.ones(3) default_mode = dpnp.convolve(d, k, mode='full') full_mode = dpnp.convolve(d, k, mode='f') - numpy.testing.assert_array_equal(full_mode, default_mode) + assert_array_equal(full_mode, default_mode) # integer mode - with numpy.testing.assert_raises(ValueError): + with assert_raises(ValueError): dpnp.convolve(d, k, mode=-1) - numpy.testing.assert_array_equal(dpnp.convolve(d, k, mode=2), full_mode) + assert_array_equal(dpnp.convolve(d, k, mode=2), full_mode) # illegal arguments - with numpy.testing.assert_raises(TypeError): + with assert_raises(TypeError): dpnp.convolve(d, k, mode=None) @@ -53,33 +60,34 @@ def test_diff(array): dpnp_a = dpnp.array(array) expected = numpy.diff(np_a) result = dpnp.diff(dpnp_a) - numpy.testing.assert_allclose(expected, result) + assert_allclose(expected, result) -@pytest.mark.parametrize("dtype1", - [numpy.bool_, numpy.float64, numpy.float32, numpy.int64, numpy.int32, numpy.complex64, numpy.complex128], - ids=['numpy.bool_', 'numpy.float64', 'numpy.float32', 'numpy.int64', 'numpy.int32', 'numpy.complex64', 'numpy.complex128']) -@pytest.mark.parametrize("dtype2", - [numpy.bool_, numpy.float64, numpy.float32, numpy.int64, numpy.int32, numpy.complex64, numpy.complex128], - ids=['numpy.bool_', 'numpy.float64', 'numpy.float32', 'numpy.int64', 'numpy.int32', 'numpy.complex64', 'numpy.complex128']) +@pytest.mark.parametrize("dtype1", get_all_dtypes()) +@pytest.mark.parametrize("dtype2", get_all_dtypes()) +@pytest.mark.parametrize("func", + ['add', 'multiply']) @pytest.mark.parametrize("data", [[[1, 2], [3, 4]]], ids=['[[1, 2], [3, 4]]']) -def test_multiply_dtype(dtype1, dtype2, data): +def test_op_multiple_dtypes(dtype1, func, dtype2, data): np_a = numpy.array(data, dtype=dtype1) dpnp_a = dpnp.array(data, dtype=dtype1) np_b = numpy.array(data, dtype=dtype2) dpnp_b = dpnp.array(data, dtype=dtype2) - result = dpnp.multiply(dpnp_a, dpnp_b) - expected = numpy.multiply(np_a, np_b) - numpy.testing.assert_array_equal(result, expected) + result = getattr(dpnp, func)(dpnp_a, dpnp_b) + expected = getattr(numpy, func)(np_a, np_b) + assert_array_equal(result, expected) @pytest.mark.parametrize("rhs", [[[1, 2, 3], [4, 5, 6]], [2.0, 1.5, 1.0], 3, 0.3]) -@pytest.mark.parametrize("lhs", [[[6, 5, 4], [3, 2, 1]], [1.3, 2.6, 3.9], 5, 0.5]) -@pytest.mark.parametrize("dtype", [numpy.int32, numpy.int64, numpy.float32, numpy.float64]) +@pytest.mark.parametrize("lhs", [[[6, 5, 4], [3, 2, 1]], [1.3, 2.6, 3.9]]) +# TODO: achieve the same level of dtype support for all mathematical operations, like +# @pytest.mark.parametrize("dtype", get_all_dtypes()) +# and to get rid of fallbacks on numpy allowed by below fixture +# @pytest.mark.usefixtures("allow_fall_back_on_numpy") class TestMathematical: @staticmethod @@ -98,56 +106,68 @@ def _test_mathematical(self, name, dtype, lhs, rhs): b = self.array_or_scalar(numpy, rhs, dtype=dtype) expected = getattr(numpy, name)(a, b) - numpy.testing.assert_allclose(result, expected, atol=1e-4) + assert_allclose(result, expected, atol=1e-4) - @pytest.mark.usefixtures("allow_fall_back_on_numpy") + @pytest.mark.parametrize("dtype", get_all_dtypes()) def test_add(self, dtype, lhs, rhs): self._test_mathematical('add', dtype, lhs, rhs) @pytest.mark.usefixtures("allow_fall_back_on_numpy") + @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True)) def test_arctan2(self, dtype, lhs, rhs): self._test_mathematical('arctan2', dtype, lhs, rhs) @pytest.mark.usefixtures("allow_fall_back_on_numpy") + @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True)) def test_copysign(self, dtype, lhs, rhs): self._test_mathematical('copysign', dtype, lhs, rhs) @pytest.mark.usefixtures("allow_fall_back_on_numpy") + @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True)) def test_divide(self, dtype, lhs, rhs): self._test_mathematical('divide', dtype, lhs, rhs) @pytest.mark.usefixtures("allow_fall_back_on_numpy") + @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True)) def test_fmod(self, dtype, lhs, rhs): self._test_mathematical('fmod', dtype, lhs, rhs) @pytest.mark.usefixtures("allow_fall_back_on_numpy") + @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True)) def test_floor_divide(self, dtype, lhs, rhs): self._test_mathematical('floor_divide', dtype, lhs, rhs) @pytest.mark.usefixtures("allow_fall_back_on_numpy") + @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True)) def test_hypot(self, dtype, lhs, rhs): self._test_mathematical('hypot', dtype, lhs, rhs) @pytest.mark.usefixtures("allow_fall_back_on_numpy") + @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True)) def test_maximum(self, dtype, lhs, rhs): self._test_mathematical('maximum', dtype, lhs, rhs) @pytest.mark.usefixtures("allow_fall_back_on_numpy") + @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True)) def test_minimum(self, dtype, lhs, rhs): self._test_mathematical('minimum', dtype, lhs, rhs) + @pytest.mark.parametrize("dtype", get_all_dtypes()) def test_multiply(self, dtype, lhs, rhs): self._test_mathematical('multiply', dtype, lhs, rhs) @pytest.mark.usefixtures("allow_fall_back_on_numpy") + @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True)) def test_remainder(self, dtype, lhs, rhs): self._test_mathematical('remainder', dtype, lhs, rhs) @pytest.mark.usefixtures("allow_fall_back_on_numpy") + @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True)) def test_power(self, dtype, lhs, rhs): self._test_mathematical('power', dtype, lhs, rhs) @pytest.mark.usefixtures("allow_fall_back_on_numpy") + @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True)) def test_subtract(self, dtype, lhs, rhs): self._test_mathematical('subtract', dtype, lhs, rhs) @@ -155,9 +175,9 @@ def test_subtract(self, dtype, lhs, rhs): @pytest.mark.parametrize("val_type", [bool, int, float], ids=['bool', 'int', 'float']) -@pytest.mark.parametrize("data_type", - [numpy.bool_, numpy.float64, numpy.float32, numpy.int64, numpy.int32], - ids=['numpy.bool_', 'numpy.float64', 'numpy.float32', 'numpy.int64', 'numpy.int32']) +@pytest.mark.parametrize("data_type", get_all_dtypes()) +@pytest.mark.parametrize("func", + ['add', 'multiply']) @pytest.mark.parametrize("val", [0, 1, 5], ids=['0', '1', '5']) @@ -172,18 +192,18 @@ def test_subtract(self, dtype, lhs, rhs): '[[1, 2], [3, 4]]', '[[[1, 2], [3, 4]], [[1, 2], [2, 1]], [[1, 3], [3, 1]]]', '[[[[1, 2], [3, 4]], [[1, 2], [2, 1]]], [[[1, 3], [3, 1]], [[0, 1], [1, 3]]]]']) -def test_multiply_scalar(array, val, data_type, val_type): +def test_op_with_scalar(array, val, func, data_type, val_type): np_a = numpy.array(array, dtype=data_type) dpnp_a = dpnp.array(array, dtype=data_type) val_ = val_type(val) - result = dpnp.multiply(dpnp_a, val_) - expected = numpy.multiply(np_a, val_) - numpy.testing.assert_array_equal(result, expected) + result = getattr(dpnp, func)(dpnp_a, val_) + expected = getattr(numpy, func)(np_a, val_) + assert_array_equal(result, expected) - result = dpnp.multiply(val_, dpnp_a) - expected = numpy.multiply(val_, np_a) - numpy.testing.assert_array_equal(result, expected) + result = getattr(dpnp, func)(val_, dpnp_a) + expected = getattr(numpy, func)(val_, np_a) + assert_array_equal(result, expected) @pytest.mark.parametrize("shape", @@ -196,9 +216,9 @@ def test_multiply_scalar2(shape, dtype): np_a = numpy.ones(shape, dtype=dtype) dpnp_a = dpnp.ones(shape, dtype=dtype) - result = 0.5 * dpnp_a - expected = 0.5 * np_a - numpy.testing.assert_array_equal(result, expected) + result = 0.5 * dpnp_a * 1.7 + expected = 0.5 * np_a * 1.7 + assert_allclose(result, expected) @pytest.mark.usefixtures("allow_fall_back_on_numpy") @@ -211,7 +231,7 @@ def test_nancumprod(array): result = dpnp.nancumprod(dpnp_a) expected = numpy.nancumprod(np_a) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) @pytest.mark.usefixtures("allow_fall_back_on_numpy") @@ -224,31 +244,25 @@ def test_nancumsum(array): result = dpnp.nancumsum(dpnp_a) expected = numpy.nancumsum(np_a) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) @pytest.mark.parametrize("data", [[[1., -1.], [0.1, -0.1]], [-2, -1, 0, 1, 2]], ids=['[[1., -1.], [0.1, -0.1]]', '[-2, -1, 0, 1, 2]']) -@pytest.mark.parametrize("dtype", - [numpy.float64, numpy.float32, numpy.int64, numpy.int32], - ids=['numpy.float64', 'numpy.float32', 'numpy.int64', 'numpy.int32']) +@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True)) def test_negative(data, dtype): np_a = numpy.array(data, dtype=dtype) dpnp_a = dpnp.array(data, dtype=dtype) result = dpnp.negative(dpnp_a) expected = numpy.negative(np_a) - numpy.testing.assert_array_equal(result, expected) + assert_array_equal(result, expected) @pytest.mark.usefixtures("allow_fall_back_on_numpy") -@pytest.mark.parametrize("val_type", - [numpy.float64, numpy.float32, numpy.int64, numpy.int32], - ids=['numpy.float64', 'numpy.float32', 'numpy.int64', 'numpy.int32']) -@pytest.mark.parametrize("data_type", - [numpy.float64, numpy.float32, numpy.int64, numpy.int32], - ids=['numpy.float64', 'numpy.float32', 'numpy.int64', 'numpy.int32']) +@pytest.mark.parametrize("val_type", get_all_dtypes(no_bool=True, no_complex=True, no_none=True)) +@pytest.mark.parametrize("data_type", get_all_dtypes(no_bool=True, no_complex=True)) @pytest.mark.parametrize("val", [0, 1, 5], ids=['0', '1', '5']) @@ -269,12 +283,11 @@ def test_power(array, val, data_type, val_type): val_ = val_type(val) result = dpnp.power(dpnp_a, val_) expected = numpy.power(np_a, val_) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) class TestEdiff1d: - @pytest.mark.parametrize("data_type", - [numpy.float64, numpy.float32, numpy.int64, numpy.int32]) + @pytest.mark.parametrize("data_type", get_all_dtypes(no_bool=True, no_complex=True)) @pytest.mark.parametrize("array", [[1, 2, 4, 7, 0], [], [1], @@ -285,7 +298,7 @@ def test_ediff1d_int(self, array, data_type): result = dpnp.ediff1d(dpnp_a) expected = numpy.ediff1d(np_a) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) @pytest.mark.usefixtures("allow_fall_back_on_numpy") @@ -297,13 +310,12 @@ def test_ediff1d_args(self): result = dpnp.ediff1d(np_a, to_end=to_end, to_begin=to_begin) expected = numpy.ediff1d(np_a, to_end=to_end, to_begin=to_begin) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) @pytest.mark.usefixtures("allow_fall_back_on_numpy") class TestTrapz: - @pytest.mark.parametrize("data_type", - [numpy.float64, numpy.float32, numpy.int64, numpy.int32]) + @pytest.mark.parametrize("data_type", get_all_dtypes(no_bool=True, no_complex=True)) @pytest.mark.parametrize("array", [[1, 2, 3], [[1, 2, 3], [4, 5, 6]], [1, 4, 6, 9, 10, 12], @@ -315,12 +327,10 @@ def test_trapz_default(self, array, data_type): result = dpnp.trapz(dpnp_a) expected = numpy.trapz(np_a) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) - @pytest.mark.parametrize("data_type_y", - [numpy.float64, numpy.float32, numpy.int64, numpy.int32]) - @pytest.mark.parametrize("data_type_x", - [numpy.float64, numpy.float32, numpy.int64, numpy.int32]) + @pytest.mark.parametrize("data_type_y", get_all_dtypes(no_bool=True, no_complex=True)) + @pytest.mark.parametrize("data_type_x", get_all_dtypes(no_bool=True, no_complex=True)) @pytest.mark.parametrize("y_array", [[1, 2, 4, 5], [1., 2.5, 6., 7.]]) @pytest.mark.parametrize("x_array", [[2, 5, 6, 9]]) @@ -333,7 +343,7 @@ def test_trapz_with_x_params(self, y_array, x_array, data_type_y, data_type_x): result = dpnp.trapz(dpnp_y, dpnp_x) expected = numpy.trapz(np_y, np_x) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) @pytest.mark.parametrize("array", [[1, 2, 3], [4, 5, 6]]) def test_trapz_with_x_param_2ndim(self, array): @@ -342,7 +352,7 @@ def test_trapz_with_x_param_2ndim(self, array): result = dpnp.trapz(dpnp_a, dpnp_a) expected = numpy.trapz(np_a, np_a) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) @pytest.mark.parametrize("y_array", [[1, 2, 4, 5], [1., 2.5, 6., 7., ]]) @@ -353,7 +363,7 @@ def test_trapz_with_dx_params(self, y_array, dx): result = dpnp.trapz(dpnp_y, dx=dx) expected = numpy.trapz(np_y, dx=dx) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) @pytest.mark.usefixtures("allow_fall_back_on_numpy") @@ -388,7 +398,7 @@ def test_cross_3x3(self, x1, x2, axisa, axisb, axisc, axis): result = dpnp.cross(dpnp_x1, dpnp_x2, axisa, axisb, axisc, axis) expected = numpy.cross(np_x1, np_x2, axisa, axisb, axisc, axis) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) @pytest.mark.usefixtures("allow_fall_back_on_numpy") @@ -403,7 +413,7 @@ def test_gradient_y1(self, array): result = dpnp.gradient(dpnp_y) expected = numpy.gradient(np_y) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) @pytest.mark.parametrize("array", [[2, 3, 6, 8, 4, 9], [3., 4., 7.5, 9.], @@ -415,7 +425,7 @@ def test_gradient_y1_dx(self, array, dx): result = dpnp.gradient(dpnp_y, dx) expected = numpy.gradient(np_y, dx) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) class TestCeil: @@ -433,7 +443,7 @@ def test_ceil(self): np_array = numpy.array(array_data, dtype=numpy.float64) expected = numpy.ceil(np_array, out=out) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) @pytest.mark.parametrize("dtype", [numpy.float32, numpy.int64, numpy.int32], @@ -473,7 +483,7 @@ def test_floor(self): np_array = numpy.array(array_data, dtype=numpy.float64) expected = numpy.floor(np_array, out=out) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) @pytest.mark.parametrize("dtype", [numpy.float32, numpy.int64, numpy.int32], @@ -513,7 +523,7 @@ def test_trunc(self): np_array = numpy.array(array_data, dtype=numpy.float64) expected = numpy.trunc(np_array, out=out) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) @pytest.mark.parametrize("dtype", [numpy.float32, numpy.int64, numpy.int32], @@ -556,7 +566,7 @@ def test_power(self): np_array2 = numpy.array(array2_data, dtype=numpy.float64) expected = numpy.power(np_array1, np_array2, out=out) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) @pytest.mark.parametrize("dtype", [numpy.float32, numpy.int64, numpy.int32], diff --git a/tests/test_strides.py b/tests/test_strides.py index 7ec1d6b3f03f..3c0d86a44a5a 100644 --- a/tests/test_strides.py +++ b/tests/test_strides.py @@ -1,8 +1,13 @@ import math import pytest +from .helper import get_all_dtypes import dpnp + import numpy +from numpy.testing import ( + assert_allclose +) def _getattr(ex, str_): @@ -15,12 +20,10 @@ def _getattr(ex, str_): @pytest.mark.parametrize("func_name", ['abs', ]) -@pytest.mark.parametrize("type", - [numpy.float64, numpy.float32, numpy.int64, numpy.int32], - ids=['float64', 'float32', 'int64', 'int32']) -def test_strides(func_name, type): +@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True)) +def test_strides(func_name, dtype): shape = (4, 4) - a = numpy.arange(shape[0] * shape[1], dtype=type).reshape(shape) + a = numpy.arange(shape[0] * shape[1], dtype=dtype).reshape(shape) a_strides = a[0::2, 0::2] dpa = dpnp.array(a) dpa_strides = dpa[0::2, 0::2] @@ -31,7 +34,7 @@ def test_strides(func_name, type): numpy_func = _getattr(numpy, func_name) expected = numpy_func(a_strides) - numpy.testing.assert_allclose(expected, result) + assert_allclose(expected, result) @pytest.mark.parametrize("func_name", @@ -39,9 +42,7 @@ def test_strides(func_name, type): "cosh", "conjugate", "degrees", "ediff1d", "exp", "exp2", "expm1", "fabs", "floor", "log", "log10", "log1p", "log2", "negative", "radians", "sign", "sin", "sinh", "sqrt", "square", "tanh", "trunc"]) -@pytest.mark.parametrize("dtype", - [numpy.float64, numpy.float32, numpy.int64, numpy.int32], - ids=["float64", "float32", "int64", "int32"]) +@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True)) @pytest.mark.parametrize("shape", [(10,)], ids=["(10,)"]) @@ -58,12 +59,10 @@ def test_strides_1arg(func_name, dtype, shape): numpy_func = _getattr(numpy, func_name) expected = numpy_func(b) - numpy.testing.assert_allclose(result, expected) + assert_allclose(result, expected) -@pytest.mark.parametrize("dtype", - [numpy.float64, numpy.float32, numpy.int64, numpy.int32], - ids=["float64", "float32", "int64", "int32"]) +@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True)) @pytest.mark.parametrize("shape", [(10,)], ids=["(10,)"]) @@ -80,12 +79,10 @@ def test_strides_erf(dtype, shape): for idx, val in enumerate(b): expected[idx] = math.erf(val) - numpy.testing.assert_allclose(result, expected) + assert_allclose(result, expected) -@pytest.mark.parametrize("dtype", - [numpy.float64, numpy.float32, numpy.int64, numpy.int32], - ids=["float64", "float32", "int64", "int32"]) +@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True)) @pytest.mark.parametrize("shape", [(10,)], ids=["(10,)"]) @@ -101,12 +98,10 @@ def test_strides_reciprocal(dtype, shape): result = dpnp.reciprocal(dpb) expected = numpy.reciprocal(b) - numpy.testing.assert_allclose(result, expected, rtol=1e-06) + assert_allclose(result, expected, rtol=1e-06) -@pytest.mark.parametrize("dtype", - [numpy.float64, numpy.float32, numpy.int64, numpy.int32], - ids=["float64", "float32", "int64", "int32"]) +@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True)) @pytest.mark.parametrize("shape", [(10,)], ids=["(10,)"]) @@ -120,14 +115,12 @@ def test_strides_tan(dtype, shape): result = dpnp.tan(dpb) expected = numpy.tan(b) - numpy.testing.assert_allclose(result, expected, rtol=1e-06) + assert_allclose(result, expected, rtol=1e-06) @pytest.mark.parametrize("func_name", ["add", "arctan2", "hypot", "maximum", "minimum", "multiply", "power", "subtract"]) -@pytest.mark.parametrize("dtype", - [numpy.float64, numpy.float32, numpy.int64, numpy.int32], - ids=["float64", "float32", "int64", "int32"]) +@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True)) @pytest.mark.parametrize("shape", [(3, 3)], ids=["(3, 3)"]) @@ -144,7 +137,7 @@ def test_strides_2args(func_name, dtype, shape): numpy_func = _getattr(numpy, func_name) expected = numpy_func(a, b) - numpy.testing.assert_allclose(result, expected) + assert_allclose(result, expected) @pytest.mark.parametrize("func_name", @@ -168,12 +161,10 @@ def test_strides_bitwise(func_name, dtype, shape): numpy_func = _getattr(numpy, func_name) expected = numpy_func(a, b) - numpy.testing.assert_allclose(result, expected) + assert_allclose(result, expected) -@pytest.mark.parametrize("dtype", - [numpy.float64, numpy.float32, numpy.int64, numpy.int32], - ids=["float64", "float32", "int64", "int32"]) +@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True)) @pytest.mark.parametrize("shape", [(3, 3)], ids=["(3, 3)"]) @@ -187,13 +178,10 @@ def test_strides_copysign(dtype, shape): result = dpnp.copysign(dpa, dpb) expected = numpy.copysign(a, b) - numpy.testing.assert_allclose(result, expected) + assert_allclose(result, expected) -@pytest.mark.usefixtures("allow_fall_back_on_numpy") -@pytest.mark.parametrize("dtype", - [numpy.float64, numpy.float32, numpy.int64, numpy.int32], - ids=["float64", "float32", "int64", "int32"]) +@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True)) @pytest.mark.parametrize("shape", [(3, 3)], ids=["(3, 3)"]) @@ -207,13 +195,10 @@ def test_strides_fmod(dtype, shape): result = dpnp.fmod(dpa, dpb) expected = numpy.fmod(a, b) - numpy.testing.assert_allclose(result, expected) + assert_allclose(result, expected) -@pytest.mark.usefixtures("allow_fall_back_on_numpy") -@pytest.mark.parametrize("dtype", - [numpy.float64, numpy.float32, numpy.int64, numpy.int32], - ids=["float64", "float32", "int64", "int32"]) +@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True)) @pytest.mark.parametrize("shape", [(3, 3)], ids=["(3, 3)"]) @@ -227,4 +212,4 @@ def test_strides_true_devide(dtype, shape): result = dpnp.fmod(dpa, dpb) expected = numpy.fmod(a, b) - numpy.testing.assert_allclose(result, expected) + assert_allclose(result, expected) diff --git a/tests/third_party/cupy/math_tests/test_arithmetic.py b/tests/third_party/cupy/math_tests/test_arithmetic.py index 158f5cc14421..a53a8494707c 100644 --- a/tests/third_party/cupy/math_tests/test_arithmetic.py +++ b/tests/third_party/cupy/math_tests/test_arithmetic.py @@ -146,7 +146,7 @@ def check_binary(self, xp): y = y.astype(numpy.complex64) # NumPy returns an output array of another type than DPNP when input ones have diffrent types. - if self.name == 'multiply' and xp is cupy: + if self.name in ('add', 'multiply') and xp is cupy: if xp.isscalar(arg1) and xp.isscalar(arg2): # If both are scalars, the result will be a scalar, so needs to convert into numpy-scalar. y = numpy.asarray(y) From 430eca37f2958fb03ba8a6033d48703d129be746 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Thu, 9 Feb 2023 02:19:23 -0600 Subject: [PATCH 2/5] get rid of dpctl.SyclQueue() call in tests with unsupported device keyword --- dpnp/dpnp_iface_mathematical.py | 2 +- tests/skipped_tests.tbl | 4 ++-- tests/skipped_tests_gpu.tbl | 36 +++++++++++++++++++++++++++++---- tests/test_sycl_queue.py | 2 +- 4 files changed, 36 insertions(+), 8 deletions(-) diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py index 8104fdea6735..786bbe006803 100644 --- a/dpnp/dpnp_iface_mathematical.py +++ b/dpnp/dpnp_iface_mathematical.py @@ -170,7 +170,7 @@ def add(x1, Returns ------- - add : dpnp.ndarray + y : dpnp.ndarray The sum of `x1` and `x2`, element-wise. Limitations diff --git a/tests/skipped_tests.tbl b/tests/skipped_tests.tbl index acd920580cf0..96e44cca594d 100644 --- a/tests/skipped_tests.tbl +++ b/tests/skipped_tests.tbl @@ -765,12 +765,12 @@ tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_para tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_547_{arg1=array([[1, 2, 3], [4, 5, 6]]), arg2=array([[0, 1, 2], [3, 4, 5]]), dtype=float64, name='remainder', use_dtype=False}::test_binary tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_549_{arg1=array([[1, 2, 3], [4, 5, 6]]), arg2=array([[0, 1, 2], [3, 4, 5]]), dtype=float64, name='mod', use_dtype=False}::test_binary tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticModf::test_modf - +tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_0_{name='reciprocal', nargs=1}::test_raises_with_numpy_input tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_10_{name='remainder', nargs=2}::test_raises_with_numpy_input tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_11_{name='mod', nargs=2}::test_raises_with_numpy_input tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_1_{name='angle', nargs=1}::test_raises_with_numpy_input tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_8_{name='floor_divide', nargs=2}::test_raises_with_numpy_input - +tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_9_{name='fmod', nargs=2}::test_raises_with_numpy_input tests/third_party/cupy/math_tests/test_arithmetic.py::TestBoolSubtract_param_3_{shape=(), xp=dpnp}::test_bool_subtract tests/third_party/cupy/math_tests/test_explog.py::TestExplog::test_logaddexp tests/third_party/cupy/math_tests/test_explog.py::TestExplog::test_logaddexp2 diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl index f34ac97fe065..af2dbd783a4e 100644 --- a/tests/skipped_tests_gpu.tbl +++ b/tests/skipped_tests_gpu.tbl @@ -91,7 +91,18 @@ tests/third_party/cupy/indexing_tests/test_insert.py::TestDiagIndicesInvalidValu tests/third_party/cupy/indexing_tests/test_insert.py::TestDiagIndicesFrom_param_0_{shape=(3, 3)}::test_diag_indices_from tests/third_party/cupy/indexing_tests/test_insert.py::TestDiagIndicesFrom_param_1_{shape=(0, 0)}::test_diag_indices_from tests/third_party/cupy/indexing_tests/test_insert.py::TestDiagIndicesFrom_param_2_{shape=(2, 2, 2)}::test_diag_indices_from - +tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_295_{arg1=array([[1., 2., 3.], [4., 5., 6.]], dtype=float32), arg2=array([[0, 1, 2], [3, 4, 5]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary +tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_303_{arg1=array([[1., 2., 3.], [4., 5., 6.]], dtype=float32), arg2=array([[0, 1, 2], [3, 4, 5]], dtype=int64), dtype=float64, name='floor_divide', use_dtype=False}::test_binary +tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_375_{arg1=array([[1., 2., 3.], [4., 5., 6.]]), arg2=array([[0, 1, 2], [3, 4, 5]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary +tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_383_{arg1=array([[1., 2., 3.], [4., 5., 6.]]), arg2=array([[0, 1, 2], [3, 4, 5]], dtype=int64), dtype=float64, name='floor_divide', use_dtype=False}::test_binary +tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_439_{arg1=array([[1, 2, 3], [4, 5, 6]]), arg2=array([[0., 1., 2.], [3., 4., 5.]], dtype=float32), dtype=float64, name='floor_divide', use_dtype=False}::test_binary +tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_447_{arg1=array([[1, 2, 3], [4, 5, 6]]), arg2=array([[0., 1., 2.], [3., 4., 5.]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary +tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_455_{arg1=array([[1, 2, 3], [4, 5, 6]]), arg2=array([[0, 1, 2], [3, 4, 5]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary +tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_463_{arg1=array([[1, 2, 3], [4, 5, 6]]), arg2=array([[0, 1, 2], [3, 4, 5]], dtype=int64), dtype=float64, name='floor_divide', use_dtype=False}::test_binary +tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_519_{arg1=array([[1, 2, 3], [4, 5, 6]], dtype=int64), arg2=array([[0., 1., 2.], [3., 4., 5.]], dtype=float32), dtype=float64, name='floor_divide', use_dtype=False}::test_binary +tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_527_{arg1=array([[1, 2, 3], [4, 5, 6]], dtype=int64), arg2=array([[0., 1., 2.], [3., 4., 5.]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary +tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_535_{arg1=array([[1, 2, 3], [4, 5, 6]], dtype=int64), arg2=array([[0, 1, 2], [3, 4, 5]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary +tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_543_{arg1=array([[1, 2, 3], [4, 5, 6]], dtype=int64), arg2=array([[0, 1, 2], [3, 4, 5]], dtype=int64), dtype=float64, name='floor_divide', use_dtype=False}::test_binary tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_external_prod_all tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_external_prod_axis tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_external_sum_all @@ -958,17 +969,34 @@ tests/third_party/cupy/manipulation_tests/test_tiling.py::TestTile_param_4_{reps tests/third_party/cupy/manipulation_tests/test_tiling.py::TestTile_param_5_{reps=(2, 3, 4, 5)}::test_array_tile tests/third_party/cupy/manipulation_tests/test_transpose.py::TestTranspose::test_moveaxis_invalid5_2 tests/third_party/cupy/manipulation_tests/test_transpose.py::TestTranspose::test_moveaxis_invalid5_3 - +tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_279_{arg1=array([[1., 2., 3.], [4., 5., 6.]], dtype=float32), arg2=array([[0., 1., 2.], [3., 4., 5.]], dtype=float32), dtype=float64, name='floor_divide', use_dtype=False}::test_binary +tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_287_{arg1=array([[1., 2., 3.], [4., 5., 6.]], dtype=float32), arg2=array([[0., 1., 2.], [3., 4., 5.]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary +tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_295_{arg1=array([[1., 2., 3.], [4., 5., 6.]], dtype=float32), arg2=array([[0, 1, 2], [3, 4, 5]], dtype=int32), dtype=float64, name='floor_divide', use_dtype=False}::test_binary +tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_303_{arg1=array([[1., 2., 3.], [4., 5., 6.]], dtype=float32), arg2=array([[0, 1, 2], [3, 4, 5]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary +tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_359_{arg1=array([[1., 2., 3.], [4., 5., 6.]]), arg2=array([[0., 1., 2.], [3., 4., 5.]], dtype=float32), dtype=float64, name='floor_divide', use_dtype=False}::test_binary +tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_367_{arg1=array([[1., 2., 3.], [4., 5., 6.]]), arg2=array([[0., 1., 2.], [3., 4., 5.]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary +tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_375_{arg1=array([[1., 2., 3.], [4., 5., 6.]]), arg2=array([[0, 1, 2], [3, 4, 5]], dtype=int32), dtype=float64, name='floor_divide', use_dtype=False}::test_binary +tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_383_{arg1=array([[1., 2., 3.], [4., 5., 6.]]), arg2=array([[0, 1, 2], [3, 4, 5]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary +tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_439_{arg1=array([[1, 2, 3], [4, 5, 6]], dtype=int32), arg2=array([[0., 1., 2.], [3., 4., 5.]], dtype=float32), dtype=float64, name='floor_divide', use_dtype=False}::test_binary +tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_447_{arg1=array([[1, 2, 3], [4, 5, 6]], dtype=int32), arg2=array([[0., 1., 2.], [3., 4., 5.]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_455_{arg1=array([[1, 2, 3], [4, 5, 6]], dtype=int32), arg2=array([[0, 1, 2], [3, 4, 5]], dtype=int32), dtype=float64, name='floor_divide', use_dtype=False}::test_binary tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_463_{arg1=array([[1, 2, 3], [4, 5, 6]], dtype=int32), arg2=array([[0, 1, 2], [3, 4, 5]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary +tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_519_{arg1=array([[1, 2, 3], [4, 5, 6]]), arg2=array([[0., 1., 2.], [3., 4., 5.]], dtype=float32), dtype=float64, name='floor_divide', use_dtype=False}::test_binary +tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_527_{arg1=array([[1, 2, 3], [4, 5, 6]]), arg2=array([[0., 1., 2.], [3., 4., 5.]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_535_{arg1=array([[1, 2, 3], [4, 5, 6]]), arg2=array([[0, 1, 2], [3, 4, 5]], dtype=int32), dtype=float64, name='floor_divide', use_dtype=False}::test_binary tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_543_{arg1=array([[1, 2, 3], [4, 5, 6]]), arg2=array([[0, 1, 2], [3, 4, 5]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary - +tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticModf::test_modf +tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_0_{name='reciprocal', nargs=1}::test_raises_with_numpy_input tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_10_{name='remainder', nargs=2}::test_raises_with_numpy_input tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_11_{name='mod', nargs=2}::test_raises_with_numpy_input tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_1_{name='angle', nargs=1}::test_raises_with_numpy_input +tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_4_{name='divide', nargs=2}::test_raises_with_numpy_input +tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_5_{name='power', nargs=2}::test_raises_with_numpy_input +tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_6_{name='subtract', nargs=2}::test_raises_with_numpy_input +tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_7_{name='true_divide', nargs=2}::test_raises_with_numpy_input tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_8_{name='floor_divide', nargs=2}::test_raises_with_numpy_input - +tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_9_{name='fmod', nargs=2}::test_raises_with_numpy_input +tests/third_party/cupy/math_tests/test_arithmetic.py::TestBoolSubtract_param_3_{shape=(), xp=dpnp}::test_bool_subtract tests/third_party/cupy/math_tests/test_explog.py::TestExplog::test_logaddexp tests/third_party/cupy/math_tests/test_explog.py::TestExplog::test_logaddexp2 tests/third_party/cupy/math_tests/test_floating.py::TestFloating::test_copysign_float diff --git a/tests/test_sycl_queue.py b/tests/test_sycl_queue.py index 413596e2cc76..bc42f70b3700 100644 --- a/tests/test_sycl_queue.py +++ b/tests/test_sycl_queue.py @@ -426,7 +426,7 @@ def test_random_state(func, args, kwargs, device, usm_type): assert device == res_array.sycl_device assert usm_type == res_array.usm_type - sycl_queue = dpctl.SyclQueue(device=device, property="in_order") + sycl_queue = dpctl.SyclQueue(device, property="in_order") # test with in-order SYCL queue per a device and passed as argument rs = dpnp.random.RandomState((147, 56, 896), sycl_queue=sycl_queue) From a7539e6c526da1e9b2368b21e8b84becb957afa6 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 10 Feb 2023 06:55:46 -0600 Subject: [PATCH 3/5] Add a fix for crash on CPU device --- .github/workflows/conda-package.yml | 1 + dpnp/backend/kernels/dpnp_krnl_elemwise.cpp | 26 ++++++++++++--------- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml index ff25e456436d..da29bf31dd09 100644 --- a/.github/workflows/conda-package.yml +++ b/.github/workflows/conda-package.yml @@ -17,6 +17,7 @@ env: test_linalg.py test_mathematical.py test_random_state.py + test_special.py test_usm_type.py VER_JSON_NAME: 'version.json' VER_SCRIPT1: "import json; f = open('version.json', 'r'); j = json.load(f); f.close(); " diff --git a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp index 1b90e4a6821e..4ce5ad01e9c6 100644 --- a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp @@ -1027,22 +1027,23 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap) auto sg = nd_it.get_sub_group(); \ size_t start = vec_sz * (nd_it.get_group(0) * nd_it.get_local_range(0) + \ sg.get_group_id()[0] * sg.get_max_local_range()[0]); \ - size_t end = start + static_cast(vec_sz); \ + size_t end = start + static_cast(vec_sz) * sg.get_max_local_range()[0] - 1; \ \ - if (end < result_size) { \ + if (end < result_size) \ + { \ sycl::vec<_DataType_input1, vec_sz> x1 = \ sg.load(sycl::multi_ptr<_DataType_input1, global_space>(&input1_data[start])); \ sycl::vec<_DataType_input2, vec_sz> x2 = \ sg.load(sycl::multi_ptr<_DataType_input2, global_space>(&input2_data[start])); \ sycl::vec<_DataType_output, vec_sz> res_vec; \ - \ if constexpr (both_types_are_same<_DataType_input1, _DataType_input2, __vec_types__>) \ { \ res_vec = __vec_operation__; \ } \ else \ { \ - for (size_t k = 0; k < vec_sz; ++k) { \ + for (size_t k = 0; k < vec_sz; ++k) \ + { \ const _DataType_output input1_elem = x1[k]; \ const _DataType_output input2_elem = x2[k]; \ res_vec[k] = __operation__; \ @@ -1051,8 +1052,10 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap) sg.store(sycl::multi_ptr<_DataType_output, global_space>(&result[start]), res_vec); \ \ } \ - else { \ - for (size_t k = start; k < result_size; ++k) { \ + else \ + { \ + for (size_t k = start; k < result_size; ++k) \ + { \ const _DataType_output input1_elem = input1_data[k]; \ const _DataType_output input2_elem = input2_data[k]; \ result[k] = __operation__; \ @@ -1061,6 +1064,7 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap) }; \ \ auto kernel_func = [&](sycl::handler& cgh) { \ + sycl::stream out(65536, 128, cgh);\ cgh.parallel_for>(\ sycl::nd_range<1>(gws_range, lws_range), kernel_parallel_for_func); \ }; \ @@ -1070,11 +1074,11 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap) { \ auto kernel_parallel_for_func = [=](sycl::id<1> global_id) { \ const size_t i = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */ \ - { \ - const _DataType_output input1_elem = input1_data[i]; \ - const _DataType_output input2_elem = input2_data[i]; \ - result[i] = __operation__; \ - } \ + \ + const _DataType_output input1_elem = input1_data[i]; \ + const _DataType_output input2_elem = input2_data[i]; \ + result[i] = __operation__; \ + \ }; \ auto kernel_func = [&](sycl::handler& cgh) { \ cgh.parallel_for>( \ From e8edc9a9ea9db485323bca4b58d125d7eec32202 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 10 Feb 2023 08:56:24 -0600 Subject: [PATCH 4/5] USM type in operations with a scalar --- dpnp/dpnp_iface.py | 5 +- dpnp/dpnp_iface_logic.py | 90 +++++++++++++++++------------ dpnp/dpnp_iface_mathematical.py | 20 ++++--- dpnp/dpnp_utils/dpnp_algo_utils.pyx | 49 +++++++++++----- tests/test_usm_type.py | 42 ++++++++++---- 5 files changed, 134 insertions(+), 72 deletions(-) diff --git a/dpnp/dpnp_iface.py b/dpnp/dpnp_iface.py index 4806b511aff4..1c60d1c999e1 100644 --- a/dpnp/dpnp_iface.py +++ b/dpnp/dpnp_iface.py @@ -225,6 +225,7 @@ def default_float_type(device=None, sycl_queue=None): def get_dpnp_descriptor(ext_obj, copy_when_strides=True, copy_when_nondefault_queue=True, + alloc_usm_type=None, alloc_queue=None): """ Return True: @@ -245,9 +246,9 @@ def get_dpnp_descriptor(ext_obj, return False # If input object is a scalar, it means it was allocated on host memory. - # We need to copy it to device memory according to compute follows data paradigm. + # We need to copy it to USM memory according to compute follows data paradigm. if isscalar(ext_obj): - ext_obj = array(ext_obj, sycl_queue=alloc_queue) + ext_obj = array(ext_obj, usm_type=alloc_usm_type, sycl_queue=alloc_queue) # while dpnp functions have no implementation with strides support # we need to create a non-strided copy diff --git a/dpnp/dpnp_iface_logic.py b/dpnp/dpnp_iface_logic.py index de7537a42878..e94b0f6c1efb 100644 --- a/dpnp/dpnp_iface_logic.py +++ b/dpnp/dpnp_iface_logic.py @@ -277,11 +277,13 @@ def equal(x1, # at least either x1 or x2 has to be an array pass else: - # get a common queue to copy data from the host into a device if any input is scalar - queue = get_common_allocation_queue([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else None + # get USM type and queue to copy scalar from the host memory into a USM allocation + usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None) - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue) - x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, + alloc_usm_type=usm_type, alloc_queue=queue) + x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, + alloc_usm_type=usm_type, alloc_queue=queue) if x1_desc and x2_desc: return dpnp_equal(x1_desc, x2_desc).get_pyobj() return call_origin(numpy.equal, x1, x2) @@ -345,11 +347,13 @@ def greater(x1, # at least either x1 or x2 has to be an array pass else: - # get a common queue to copy data from the host into a device if any input is scalar - queue = get_common_allocation_queue([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else None + # get USM type and queue to copy scalar from the host memory into a USM allocation + usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None) - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue) - x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, + alloc_usm_type=usm_type, alloc_queue=queue) + x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, + alloc_usm_type=usm_type, alloc_queue=queue) if x1_desc and x2_desc: return dpnp_greater(x1_desc, x2_desc).get_pyobj() return call_origin(numpy.greater, x1, x2) @@ -413,11 +417,13 @@ def greater_equal(x1, # at least either x1 or x2 has to be an array pass else: - # get a common queue to copy data from the host into a device if any input is scalar - queue = get_common_allocation_queue([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else None + # get USM type and queue to copy scalar from the host memory into a USM allocation + usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None) - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue) - x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, + alloc_usm_type=usm_type, alloc_queue=queue) + x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, + alloc_usm_type=usm_type, alloc_queue=queue) if x1_desc and x2_desc: return dpnp_greater_equal(x1_desc, x2_desc).get_pyobj() return call_origin(numpy.greater_equal, x1, x2) @@ -659,11 +665,13 @@ def less(x1, # at least either x1 or x2 has to be an array pass else: - # get a common queue to copy data from the host into a device if any input is scalar - queue = get_common_allocation_queue([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else None + # get USM type and queue to copy scalar from the host memory into a USM allocation + usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None) - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue) - x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, + alloc_usm_type=usm_type, alloc_queue=queue) + x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, + alloc_usm_type=usm_type, alloc_queue=queue) if x1_desc and x2_desc: return dpnp_less(x1_desc, x2_desc).get_pyobj() return call_origin(numpy.less, x1, x2) @@ -727,11 +735,13 @@ def less_equal(x1, # at least either x1 or x2 has to be an array pass else: - # get a common queue to copy data from the host into a device if any input is scalar - queue = get_common_allocation_queue([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else None + # get USM type and queue to copy scalar from the host memory into a USM allocation + usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None) - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue) - x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, + alloc_usm_type=usm_type, alloc_queue=queue) + x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, + alloc_usm_type=usm_type, alloc_queue=queue) if x1_desc and x2_desc: return dpnp_less_equal(x1_desc, x2_desc).get_pyobj() return call_origin(numpy.less_equal, x1, x2) @@ -794,11 +804,13 @@ def logical_and(x1, # at least either x1 or x2 has to be an array pass else: - # get a common queue to copy data from the host into a device if any input is scalar - queue = get_common_allocation_queue([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else None + # get USM type and queue to copy scalar from the host memory into a USM allocation + usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None) - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue) - x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, + alloc_usm_type=usm_type, alloc_queue=queue) + x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, + alloc_usm_type=usm_type, alloc_queue=queue) if x1_desc and x2_desc: return dpnp_logical_and(x1_desc, x2_desc).get_pyobj() return call_origin(numpy.logical_and, x1, x2) @@ -918,11 +930,13 @@ def logical_or(x1, # at least either x1 or x2 has to be an array pass else: - # get a common queue to copy data from the host into a device if any input is scalar - queue = get_common_allocation_queue([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else None + # get USM type and queue to copy scalar from the host memory into a USM allocation + usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None) - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue) - x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, + alloc_usm_type=usm_type, alloc_queue=queue) + x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, + alloc_usm_type=usm_type, alloc_queue=queue) if x1_desc and x2_desc: return dpnp_logical_or(x1_desc, x2_desc).get_pyobj() return call_origin(numpy.logical_or, x1, x2) @@ -985,11 +999,13 @@ def logical_xor(x1, # at least either x1 or x2 has to be an array pass else: - # get a common queue to copy data from the host into a device if any input is scalar - queue = get_common_allocation_queue([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else None + # get USM type and queue to copy scalar from the host memory into a USM allocation + usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None) - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue) - x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, + alloc_usm_type=usm_type, alloc_queue=queue) + x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, + alloc_usm_type=usm_type, alloc_queue=queue) if x1_desc and x2_desc: return dpnp_logical_xor(x1_desc, x2_desc).get_pyobj() return call_origin(numpy.logical_xor, x1, x2) @@ -1053,11 +1069,13 @@ def not_equal(x1, # at least either x1 or x2 has to be an array pass else: - # get a common queue to copy data from the host into a device if any input is scalar - queue = get_common_allocation_queue([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else None + # get USM type and queue to copy scalar from the host memory into a USM allocation + usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None) - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue) - x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, + alloc_usm_type=usm_type, alloc_queue=queue) + x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, + alloc_usm_type=usm_type, alloc_queue=queue) if x1_desc and x2_desc: return dpnp_not_equal(x1_desc, x2_desc).get_pyobj() return call_origin(numpy.not_equal, x1, x2) diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py index 786bbe006803..e254e916b846 100644 --- a/dpnp/dpnp_iface_mathematical.py +++ b/dpnp/dpnp_iface_mathematical.py @@ -205,11 +205,13 @@ def add(x1, # at least either x1 or x2 has to be an array pass else: - # get a common queue to copy data from the host into a device if any input is scalar - queue = get_common_allocation_queue([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else None + # get USM type and queue to copy scalar from the host memory into a USM allocation + usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None) - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue) - x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, + alloc_usm_type=usm_type, alloc_queue=queue) + x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, + alloc_usm_type=usm_type, alloc_queue=queue) if x1_desc and x2_desc: return dpnp_add(x1_desc, x2_desc, dtype=dtype, out=out, where=where).get_pyobj() @@ -1133,11 +1135,13 @@ def multiply(x1, # at least either x1 or x2 has to be an array pass else: - # get a common queue to copy data from the host into a device if any input is scalar - queue = get_common_allocation_queue([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else None + # get USM type and queue to copy scalar from the host memory into a USM allocation + usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None) - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue) - x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, + alloc_usm_type=usm_type, alloc_queue=queue) + x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, + alloc_usm_type=usm_type, alloc_queue=queue) if x1_desc and x2_desc: return dpnp_multiply(x1_desc, x2_desc, dtype=dtype, out=out, where=where).get_pyobj() diff --git a/dpnp/dpnp_utils/dpnp_algo_utils.pyx b/dpnp/dpnp_utils/dpnp_algo_utils.pyx index 4913d5854918..abdc4107f649 100644 --- a/dpnp/dpnp_utils/dpnp_algo_utils.pyx +++ b/dpnp/dpnp_utils/dpnp_algo_utils.pyx @@ -34,7 +34,7 @@ This module contains differnt helpers and utilities import numpy import dpctl -import dpctl.tensor as dpt +import dpctl.utils as dpu import dpnp.config as config import dpnp.dpnp_container as dpnp_container @@ -70,7 +70,7 @@ __all__ = [ "dpnp_descriptor", "get_axis_indeces", "get_axis_offsets", - "get_common_allocation_queue", + "get_usm_allocations", "_get_linear_index", "map_dtype_to_device", "normalize_axis", @@ -163,7 +163,7 @@ def call_origin(function, *args, **kwargs): kwargx = convert_item(kwarg) kwargs_new[key] = kwargx - exec_q = dpctl.utils.get_execution_queue(alloc_queues) + exec_q = dpu.get_execution_queue(alloc_queues) if exec_q is None: exec_q = dpnp.get_normalized_queue_device(sycl_queue=sycl_queue) # print(f"DPNP call_origin(): bakend called. \n\t function={function}, \n\t args_new={args_new}, \n\t kwargs_new={kwargs_new}, \n\t dpnp_inplace={dpnp_inplace}") @@ -220,30 +220,49 @@ def unwrap_array(x1): return x1 -def get_common_allocation_queue(objects): - """ - Given a list of objects returns the queue which can be used for a memory allocation - to follow compute follows data paradigm, or returns `None` if the default queue can be used. - An exception will be raised, if the paradigm is broked for the given list of objects. - """ - if not isinstance(objects, (list, tuple)): - raise TypeError("Expected a list or a tuple, got {}".format(type(objects))) - - if len(objects) == 0: +def _get_coerced_usm_type(objects): + types_in_use = [obj.usm_type for obj in objects if hasattr(obj, "usm_type")] + if len(types_in_use) == 0: return None + elif len(types_in_use) == 1: + return types_in_use[0] + + common_usm_type = dpu.get_coerced_usm_type(types_in_use) + if common_usm_type is None: + raise ValueError("Input arrays must have coerced USM types") + return common_usm_type + +def _get_common_allocation_queue(objects): queues_in_use = [obj.sycl_queue for obj in objects if hasattr(obj, "sycl_queue")] if len(queues_in_use) == 0: return None elif len(queues_in_use) == 1: return queues_in_use[0] - common_queue = dpt.get_execution_queue(queues_in_use) + common_queue = dpu.get_execution_queue(queues_in_use) if common_queue is None: raise ValueError("Input arrays must be allocated on the same SYCL queue") return common_queue +def get_usm_allocations(objects): + """ + Given a list of objects returns a tuple of USM type and SYCL queue + which can be used for a memory allocation and to follow compute follows data paradigm, + or returns `(None, None)` if the default USM type and SYCL queue can be used. + An exception will be raised, if the paradigm is broked for the given list of objects. + + """ + + if not isinstance(objects, (list, tuple)): + raise TypeError("Expected a list or a tuple, got {}".format(type(objects))) + + if len(objects) == 0: + return (None, None) + return (_get_coerced_usm_type(objects), _get_common_allocation_queue(objects)) + + def map_dtype_to_device(dtype, device): """ Map an input ``dtype`` with type ``device`` may use @@ -631,7 +650,7 @@ cdef tuple get_common_usm_allocation(dpnp_descriptor x1, dpnp_descriptor x2): "could not recognize common USM type for inputs of USM types {} and {}" "".format(array1_obj.usm_type, array2_obj.usm_type)) - common_sycl_queue = dpctl.utils.get_execution_queue((array1_obj.sycl_queue, array2_obj.sycl_queue)) + common_sycl_queue = dpu.get_execution_queue((array1_obj.sycl_queue, array2_obj.sycl_queue)) # TODO: refactor, remove when CFD is implemented in all array constructors if common_sycl_queue is None and array1_obj.sycl_context == array2_obj.sycl_context: common_sycl_queue = array1_obj.sycl_queue diff --git a/tests/test_usm_type.py b/tests/test_usm_type.py index 094fe419c263..15b853b3bfa2 100644 --- a/tests/test_usm_type.py +++ b/tests/test_usm_type.py @@ -11,16 +11,17 @@ ] -@pytest.mark.parametrize("usm_type", list_of_usm_types, ids=list_of_usm_types) -def test_coerced_usm_types_sum(usm_type): - x = dp.arange(10, usm_type = "device") - y = dp.arange(10, usm_type = usm_type) +@pytest.mark.parametrize("usm_type_x", list_of_usm_types, ids=list_of_usm_types) +@pytest.mark.parametrize("usm_type_y", list_of_usm_types, ids=list_of_usm_types) +def test_coerced_usm_types_sum(usm_type_x, usm_type_y): + x = dp.arange(1000, usm_type = usm_type_x) + y = dp.arange(1000, usm_type = usm_type_y) - z = x + y - - assert z.usm_type == x.usm_type - assert z.usm_type == "device" - assert y.usm_type == usm_type + z = 1.3 + x + y + 2 + + assert x.usm_type == usm_type_x + assert y.usm_type == usm_type_y + assert z.usm_type == du.get_coerced_usm_type([usm_type_x, usm_type_y]) @pytest.mark.parametrize("usm_type_x", list_of_usm_types, ids=list_of_usm_types) @@ -29,8 +30,8 @@ def test_coerced_usm_types_mul(usm_type_x, usm_type_y): x = dp.arange(10, usm_type = usm_type_x) y = dp.arange(10, usm_type = usm_type_y) - z = x * y - + z = 3 * x * y * 1.5 + assert x.usm_type == usm_type_x assert y.usm_type == usm_type_y assert z.usm_type == du.get_coerced_usm_type([usm_type_x, usm_type_y]) @@ -61,3 +62,22 @@ def test_array_creation(func, args, usm_type_x, usm_type_y): assert x.usm_type == usm_type_x assert y.usm_type == usm_type_y + +@pytest.mark.parametrize("op", + ['equal', 'greater', 'greater_equal', 'less', 'less_equal', + 'logical_and', 'logical_or', 'logical_xor', 'not_equal'], + ids=['equal', 'greater', 'greater_equal', 'less', 'less_equal', + 'logical_and', 'logical_or', 'logical_xor', 'not_equal']) +@pytest.mark.parametrize("usm_type_x", list_of_usm_types, ids=list_of_usm_types) +@pytest.mark.parametrize("usm_type_y", list_of_usm_types, ids=list_of_usm_types) +def test_coerced_usm_types_logic_op(op, usm_type_x, usm_type_y): + x = dp.arange(100, usm_type = usm_type_x) + y = dp.arange(100, usm_type = usm_type_y)[::-1] + + z = getattr(dp, op)(x, y) + zx = getattr(dp, op)(x, 50) + zy = getattr(dp, op)(30, y) + + assert x.usm_type == zx.usm_type == usm_type_x + assert y.usm_type == zy.usm_type == usm_type_y + assert z.usm_type == du.get_coerced_usm_type([usm_type_x, usm_type_y]) From 2f68eb18e4cea16b79d0afd224a075ad29dc32f4 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 10 Feb 2023 10:01:47 -0600 Subject: [PATCH 5/5] Porting fix for crash to logic kernel --- dpnp/backend/kernels/dpnp_krnl_elemwise.cpp | 8 ++++---- dpnp/backend/kernels/dpnp_krnl_logic.cpp | 16 ++++++++-------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp index 4ce5ad01e9c6..32097d321a71 100644 --- a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp @@ -1025,11 +1025,11 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap) \ auto kernel_parallel_for_func = [=](sycl::nd_item<1> nd_it) { \ auto sg = nd_it.get_sub_group(); \ - size_t start = vec_sz * (nd_it.get_group(0) * nd_it.get_local_range(0) + \ - sg.get_group_id()[0] * sg.get_max_local_range()[0]); \ - size_t end = start + static_cast(vec_sz) * sg.get_max_local_range()[0] - 1; \ + const auto max_sg_size = sg.get_max_local_range()[0]; \ + const size_t start = vec_sz * (nd_it.get_group(0) * nd_it.get_local_range(0) + \ + sg.get_group_id()[0] * max_sg_size); \ \ - if (end < result_size) \ + if (start + static_cast(vec_sz) * max_sg_size < result_size) \ { \ sycl::vec<_DataType_input1, vec_sz> x1 = \ sg.load(sycl::multi_ptr<_DataType_input1, global_space>(&input1_data[start])); \ diff --git a/dpnp/backend/kernels/dpnp_krnl_logic.cpp b/dpnp/backend/kernels/dpnp_krnl_logic.cpp index be1bb1bab79b..157347aa90c0 100644 --- a/dpnp/backend/kernels/dpnp_krnl_logic.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_logic.cpp @@ -403,11 +403,11 @@ DPCTLSyclEventRef (*dpnp_any_ext_c)(DPCTLSyclQueueRef, \ auto kernel_parallel_for_func = [=](sycl::nd_item<1> nd_it) { \ auto sg = nd_it.get_sub_group(); \ - size_t start = vec_sz * (nd_it.get_group(0) * nd_it.get_local_range(0) + \ - sg.get_group_id()[0] * sg.get_max_local_range()[0]); \ - size_t end = start + static_cast(vec_sz); \ + const auto max_sg_size = sg.get_max_local_range()[0]; \ + const size_t start = vec_sz * (nd_it.get_group(0) * nd_it.get_local_range(0) + \ + sg.get_group_id()[0] * max_sg_size); \ \ - if (end < result_size) { \ + if (start + static_cast(vec_sz) * max_sg_size < result_size) { \ sycl::vec<_DataType_input1, vec_sz> x1 = \ sg.load(sycl::multi_ptr<_DataType_input1, global_space>(&input1_data[start])); \ sycl::vec res_vec; \ @@ -647,11 +647,11 @@ static void func_map_logic_1arg_1type_helper(func_map_t& fmap) \ auto kernel_parallel_for_func = [=](sycl::nd_item<1> nd_it) { \ auto sg = nd_it.get_sub_group(); \ - size_t start = vec_sz * (nd_it.get_group(0) * nd_it.get_local_range(0) + \ - sg.get_group_id()[0] * sg.get_max_local_range()[0]); \ - size_t end = start + static_cast(vec_sz); \ + const auto max_sg_size = sg.get_max_local_range()[0]; \ + const size_t start = vec_sz * (nd_it.get_group(0) * nd_it.get_local_range(0) + \ + sg.get_group_id()[0] * max_sg_size); \ \ - if (end < result_size) { \ + if (start + static_cast(vec_sz) * max_sg_size < result_size) { \ sycl::vec<_DataType_input1, vec_sz> x1 = \ sg.load(sycl::multi_ptr<_DataType_input1, global_space>(&input1_data[start])); \ sycl::vec<_DataType_input2, vec_sz> x2 = \