diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 17140158deee..b5edc3985308 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -2,4 +2,5 @@ - [ ] Have you added a test, reproducer or referred to issue with a reproducer? - [ ] Have you tested your changes locally for CPU and GPU devices? - [ ] Have you made sure that new changes do not introduce compiler warnings? +- [ ] Have you checked performance impact of proposed changes? - [ ] If this PR is a work in progress, are you filing the PR as a draft? diff --git a/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp b/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp index 0964f18df81e..9cd41f11cb19 100644 --- a/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp +++ b/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp @@ -138,9 +138,9 @@ MACRO_2ARG_3TYPES_OP(dpnp_divide_c, MACRO_UNPACK_TYPES(float, double, std::complex, std::complex)) MACRO_2ARG_3TYPES_OP(dpnp_fmod_c, - sycl::fmod((double)input1_elem, (double)input2_elem), - nullptr, - std::false_type, + dispatch_fmod_op(input1_elem, input2_elem), + x1 % x2, + MACRO_UNPACK_TYPES(bool, std::int32_t, std::int64_t), oneapi::mkl::vm::fmod, MACRO_UNPACK_TYPES(float, double)) diff --git a/dpnp/backend/kernels/dpnp_krnl_bitwise.cpp b/dpnp/backend/kernels/dpnp_krnl_bitwise.cpp index f3d8a4a95ccd..6264d5d7146d 100644 --- a/dpnp/backend/kernels/dpnp_krnl_bitwise.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_bitwise.cpp @@ -281,8 +281,8 @@ static void func_map_init_bitwise_1arg_1type(func_map_t& fmap) const size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */ \ { \ const shape_elem_type* result_strides_data = &dev_strides_data[0]; \ - const shape_elem_type* input1_strides_data = &dev_strides_data[1]; \ - const shape_elem_type* input2_strides_data = &dev_strides_data[2]; \ + const shape_elem_type* input1_strides_data = &dev_strides_data[result_ndim]; \ + const shape_elem_type* input2_strides_data = &dev_strides_data[2 * result_ndim]; \ \ size_t input1_id = 0; \ size_t input2_id = 0; \ diff --git a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp index 741a945fb099..2b3417ee0011 100644 --- a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp @@ -111,7 +111,7 @@ size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */ \ { \ const shape_elem_type* result_strides_data = &dev_strides_data[0]; \ - const shape_elem_type* input1_strides_data = &dev_strides_data[1]; \ + const shape_elem_type* input1_strides_data = &dev_strides_data[result_ndim]; \ \ size_t input_id = 0; \ for (size_t i = 0; i < input1_ndim; ++i) \ @@ -635,7 +635,7 @@ static void func_map_init_elemwise_1arg_2type(func_map_t& fmap) size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */ \ { \ const shape_elem_type* result_strides_data = &dev_strides_data[0]; \ - const shape_elem_type* input1_strides_data = &dev_strides_data[1]; \ + const shape_elem_type* input1_strides_data = &dev_strides_data[result_ndim]; \ \ size_t input_id = 0; \ for (size_t i = 0; i < input1_ndim; ++i) \ @@ -848,6 +848,18 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap) return; } +template +constexpr auto dispatch_fmod_op(T elem1, T elem2) +{ + if constexpr (is_any_v) + { + return elem1 % elem2; + } + else + { + return sycl::fmod(elem1, elem2); + } +} #define MACRO_2ARG_3TYPES_OP( \ __name__, __operation__, __vec_operation__, __vec_types__, __mkl_operation__, __mkl_types__) \ @@ -995,8 +1007,8 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap) const size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */ \ { \ const shape_elem_type* result_strides_data = &dev_strides_data[0]; \ - const shape_elem_type* input1_strides_data = &dev_strides_data[1]; \ - const shape_elem_type* input2_strides_data = &dev_strides_data[2]; \ + const shape_elem_type* input1_strides_data = &dev_strides_data[result_ndim]; \ + const shape_elem_type* input2_strides_data = &dev_strides_data[2 * result_ndim]; \ \ size_t input1_id = 0; \ size_t input2_id = 0; \ @@ -1261,6 +1273,16 @@ static constexpr DPNPFuncType get_divide_res_type() return widest_type; } +template +static constexpr DPNPFuncType get_fmod_res_type() +{ + if constexpr ((FT1 == DPNPFuncType::DPNP_FT_BOOL) && (FT2 == DPNPFuncType::DPNP_FT_BOOL)) + { + return DPNPFuncType::DPNP_FT_INT; + } + return populate_func_types(); +} + template static void func_map_elemwise_2arg_3type_core(func_map_t& fmap) { @@ -1300,12 +1322,29 @@ static void func_map_elemwise_2arg_3type_core(func_map_t& fmap) ...); } +template +static void func_map_elemwise_2arg_3type_core_no_complex(func_map_t& fmap) +{ + ((fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][FT1][FTs] = + {get_fmod_res_type(), + (void*)dpnp_fmod_c_ext()>, + func_type_map_t::find_type, + func_type_map_t::find_type>}), + ...); +} + template static void func_map_elemwise_2arg_3type_helper(func_map_t& fmap) { ((func_map_elemwise_2arg_3type_core(fmap)), ...); } +template +static void func_map_elemwise_2arg_3type_helper_no_complex(func_map_t& fmap) +{ + ((func_map_elemwise_2arg_3type_core_no_complex(fmap)), ...); +} + static void func_map_init_elemwise_2arg_3type(func_map_t& fmap) { fmap[DPNPFuncName::DPNP_FN_ADD][eft_INT][eft_INT] = {eft_INT, @@ -1539,39 +1578,6 @@ static void func_map_init_elemwise_2arg_3type(func_map_t& fmap) fmap[DPNPFuncName::DPNP_FN_FMOD][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_fmod_c_default}; - fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_INT][eft_INT] = {eft_INT, - (void*)dpnp_fmod_c_ext}; - fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_INT][eft_LNG] = {eft_LNG, - (void*)dpnp_fmod_c_ext}; - fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_INT][eft_FLT] = {eft_DBL, - (void*)dpnp_fmod_c_ext}; - fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_INT][eft_DBL] = {eft_DBL, - (void*)dpnp_fmod_c_ext}; - fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_LNG][eft_INT] = {eft_LNG, - (void*)dpnp_fmod_c_ext}; - fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_LNG][eft_LNG] = {eft_LNG, - (void*)dpnp_fmod_c_ext}; - fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_LNG][eft_FLT] = {eft_DBL, - (void*)dpnp_fmod_c_ext}; - fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_LNG][eft_DBL] = {eft_DBL, - (void*)dpnp_fmod_c_ext}; - fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_FLT][eft_INT] = {eft_DBL, - (void*)dpnp_fmod_c_ext}; - fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_FLT][eft_LNG] = {eft_DBL, - (void*)dpnp_fmod_c_ext}; - fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_FLT][eft_FLT] = {eft_FLT, - (void*)dpnp_fmod_c_ext}; - fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_FLT][eft_DBL] = {eft_DBL, - (void*)dpnp_fmod_c_ext}; - fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_DBL][eft_INT] = {eft_DBL, - (void*)dpnp_fmod_c_ext}; - fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_DBL][eft_LNG] = {eft_DBL, - (void*)dpnp_fmod_c_ext}; - fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_DBL][eft_FLT] = {eft_DBL, - (void*)dpnp_fmod_c_ext}; - fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_DBL][eft_DBL] = {eft_DBL, - (void*)dpnp_fmod_c_ext}; - fmap[DPNPFuncName::DPNP_FN_HYPOT][eft_INT][eft_INT] = {eft_DBL, (void*)dpnp_hypot_c_default}; fmap[DPNPFuncName::DPNP_FN_HYPOT][eft_INT][eft_LNG] = {eft_DBL, @@ -1918,6 +1924,7 @@ static void func_map_init_elemwise_2arg_3type(func_map_t& fmap) eft_DBL, (void*)dpnp_subtract_c_default}; func_map_elemwise_2arg_3type_helper(fmap); + func_map_elemwise_2arg_3type_helper_no_complex(fmap); return; } diff --git a/dpnp/backend/kernels/dpnp_krnl_logic.cpp b/dpnp/backend/kernels/dpnp_krnl_logic.cpp index d1a6767c2adc..78a9a29e99e7 100644 --- a/dpnp/backend/kernels/dpnp_krnl_logic.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_logic.cpp @@ -396,7 +396,7 @@ DPCTLSyclEventRef (*dpnp_any_ext_c)(DPCTLSyclQueueRef, const size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */ \ { \ const shape_elem_type *result_strides_data = &dev_strides_data[0]; \ - const shape_elem_type *input1_strides_data = &dev_strides_data[1]; \ + const shape_elem_type *input1_strides_data = &dev_strides_data[result_ndim]; \ \ size_t input1_id = 0; \ \ @@ -635,8 +635,8 @@ static void func_map_logic_1arg_1type_helper(func_map_t& fmap) const size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */ \ { \ const shape_elem_type *result_strides_data = &dev_strides_data[0]; \ - const shape_elem_type *input1_strides_data = &dev_strides_data[1]; \ - const shape_elem_type *input2_strides_data = &dev_strides_data[2]; \ + const shape_elem_type *input1_strides_data = &dev_strides_data[result_ndim]; \ + const shape_elem_type *input2_strides_data = &dev_strides_data[2 * result_ndim]; \ \ size_t input1_id = 0; \ size_t input2_id = 0; \ diff --git a/dpnp/backend/kernels/dpnp_krnl_searching.cpp b/dpnp/backend/kernels/dpnp_krnl_searching.cpp index fef5f78d15da..471d524643f5 100644 --- a/dpnp/backend/kernels/dpnp_krnl_searching.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_searching.cpp @@ -294,9 +294,9 @@ DPCTLSyclEventRef dpnp_where_c(DPCTLSyclQueueRef q_ref, const size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */ { const shape_elem_type* result_strides_data = &dev_strides_data[0]; - const shape_elem_type* condition_strides_data = &dev_strides_data[1]; - const shape_elem_type* input1_strides_data = &dev_strides_data[2]; - const shape_elem_type* input2_strides_data = &dev_strides_data[3]; + const shape_elem_type* condition_strides_data = &dev_strides_data[result_ndim]; + const shape_elem_type* input1_strides_data = &dev_strides_data[2 * result_ndim]; + const shape_elem_type* input2_strides_data = &dev_strides_data[3 * result_ndim]; size_t condition_id = 0; size_t input1_id = 0; diff --git a/dpnp/dpnp_algo/dpnp_algo.pyx b/dpnp/dpnp_algo/dpnp_algo.pyx index 2fa9de34b998..923454142f43 100644 --- a/dpnp/dpnp_algo/dpnp_algo.pyx +++ b/dpnp/dpnp_algo/dpnp_algo.pyx @@ -505,8 +505,23 @@ cdef utils.dpnp_descriptor call_fptr_2in_1out_strides(DPNPFuncName fptr_name, return_type = kernel_data.return_type_no_fp64 func = < fptr_2in_1out_strides_t > kernel_data.ptr_no_fp64 - if out is None: - """ Create result array with type given by FPTR data """ + # check 'out' parameter data + if out is not None: + if out.shape != result_shape: + utils.checker_throw_value_error(func_name, 'out.shape', out.shape, result_shape) + + utils.get_common_usm_allocation(x1_obj, out) # check USM allocation is common + + if out is None or out.is_array_overlapped(x1_obj) or out.is_array_overlapped(x2_obj) or not out.match_ctype(return_type): + """ + Create result array with type given by FPTR data. + If 'out' array has another dtype than expected or overlaps a memory from any input array, + we have to create a temporary array and to copy data from the temporary into 'out' array, + once the computation is completed. + Otherwise simultaneously access to the same memory may cause a race condition issue + which will result into undefined behaviour. + """ + is_result_memory_allocated = True result = utils.create_output_descriptor(result_shape, return_type, None, @@ -514,16 +529,9 @@ cdef utils.dpnp_descriptor call_fptr_2in_1out_strides(DPNPFuncName fptr_name, usm_type=result_usm_type, sycl_queue=result_sycl_queue) else: - result_type = dpnp_DPNPFuncType_to_dtype(< size_t > return_type) - if out.dtype != result_type: - utils.checker_throw_value_error(func_name, 'out.dtype', out.dtype, result_type) - if out.shape != result_shape: - utils.checker_throw_value_error(func_name, 'out.shape', out.shape, result_shape) - + is_result_memory_allocated = False result = out - utils.get_common_usm_allocation(x1_obj, result) # check USM allocation is common - cdef shape_type_c result_strides = utils.strides_to_vector(result.strides, result_shape) result_obj = result.get_array() @@ -554,4 +562,7 @@ cdef utils.dpnp_descriptor call_fptr_2in_1out_strides(DPNPFuncName fptr_name, with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) c_dpctl.DPCTLEvent_Delete(event_ref) - return result + if out is not None and is_result_memory_allocated: + return out.get_result_desc(result) + + return result.get_result_desc() diff --git a/dpnp/dpnp_array.py b/dpnp/dpnp_array.py index f2ccf56ef76b..5741ea0fa145 100644 --- a/dpnp/dpnp_array.py +++ b/dpnp/dpnp_array.py @@ -29,6 +29,23 @@ import dpnp + +def _get_unwrapped_index_key(key): + """ + Return a key where each nested instance of DPNP array is unwrapped into USM ndarray + for futher processing in DPCTL advanced indexing functions. + + """ + + if isinstance(key, tuple): + if any(isinstance(x, dpnp_array) for x in key): + # create a new tuple from the input key with unwrapped DPNP arrays + return tuple(x.get_array() if isinstance(x, dpnp_array) else x for x in key) + elif isinstance(key, dpnp_array): + return key.get_array() + return key + + class dpnp_array: """ Multi-dimensional array object. @@ -176,8 +193,7 @@ def __ge__(self, other): # '__getattribute__', def __getitem__(self, key): - if isinstance(key, dpnp_array): - key = key.get_array() + key = _get_unwrapped_index_key(key) item = self._array_obj.__getitem__(key) if not isinstance(item, dpt.usm_ndarray): @@ -194,7 +210,10 @@ def __gt__(self, other): return dpnp.greater(self, other) # '__hash__', - # '__iadd__', + + def __iadd__(self, other): + dpnp.add(self, other, out=self) + return self def __iand__(self, other): dpnp.bitwise_and(self, other, out=self) @@ -208,7 +227,10 @@ def __ilshift__(self, other): # '__imatmul__', # '__imod__', - # '__imul__', + + def __imul__(self, other): + dpnp.multiply(self, other, out=self) + return self def __index__(self): return self._array_obj.__index__() @@ -334,8 +356,8 @@ def __rxor__(self, other): # '__setattr__', def __setitem__(self, key, val): - if isinstance(key, dpnp_array): - key = key.get_array() + key = _get_unwrapped_index_key(key) + if isinstance(val, dpnp_array): val = val.get_array() @@ -760,6 +782,8 @@ def item(self, id=None): @property def itemsize(self): """ + Size of one array element in bytes. + """ return self._array_obj.itemsize @@ -785,11 +809,20 @@ def min(self, axis=None, out=None, keepdims=numpy._NoValue, initial=numpy._NoVal return dpnp.min(self, axis, out, keepdims, initial, where) - # 'nbytes', + @property + def nbytes(self): + """ + Total bytes consumed by the elements of the array. + + """ + + return self._array_obj.nbytes @property def ndim(self): """ + Number of array dimensions. + """ return self._array_obj.ndim diff --git a/dpnp/dpnp_container.py b/dpnp/dpnp_container.py index 12d28074b8fb..5bd6f460496f 100644 --- a/dpnp/dpnp_container.py +++ b/dpnp/dpnp_container.py @@ -86,22 +86,34 @@ def asarray(x1, usm_type=None, sycl_queue=None): """Converts `x1` to `dpnp_array`.""" - if isinstance(x1, dpnp_array): - x1_obj = x1.get_array() - else: - x1_obj = x1 + dpu.validate_usm_type(usm_type, allow_none=True) - sycl_queue_normalized = dpnp.get_normalized_queue_device(x1_obj, device=device, sycl_queue=sycl_queue) if order is None: order = 'C' """Converts incoming 'x1' object to 'dpnp_array'.""" - array_obj = dpt.asarray(x1_obj, - dtype=dtype, - copy=copy, - order=order, - usm_type=usm_type, - sycl_queue=sycl_queue_normalized) + if isinstance(x1, (list, tuple, range)): + array_obj = dpt.asarray(x1, + dtype=dtype, + copy=copy, + order=order, + device=device, + usm_type=usm_type, + sycl_queue=sycl_queue) + else: + if isinstance(x1, dpnp_array): + x1_obj = x1.get_array() + else: + x1_obj = x1 + + sycl_queue_normalized = dpnp.get_normalized_queue_device(x1_obj, device=device, sycl_queue=sycl_queue) + + array_obj = dpt.asarray(x1_obj, + dtype=dtype, + copy=copy, + order=order, + usm_type=usm_type, + sycl_queue=sycl_queue_normalized) return dpnp_array(array_obj.shape, buffer=array_obj, order=order) diff --git a/dpnp/dpnp_iface.py b/dpnp/dpnp_iface.py index 9bf456060ddd..6a5bcf239df2 100644 --- a/dpnp/dpnp_iface.py +++ b/dpnp/dpnp_iface.py @@ -272,6 +272,10 @@ def get_dpnp_descriptor(ext_obj, if use_origin_backend(): return False + # It's required to keep track of input object if a non-strided copy is going to be created. + # Thus there will be an extra descriptor allocated to refer on original input. + orig_desc = None + # If input object is a scalar, it means it was allocated on host memory. # We need to copy it to USM memory according to compute follows data paradigm. if isscalar(ext_obj): @@ -291,6 +295,7 @@ def get_dpnp_descriptor(ext_obj, ext_obj_offset = 0 if ext_obj.strides != shape_offsets or ext_obj_offset != 0: + orig_desc = dpnp_descriptor(ext_obj) ext_obj = array(ext_obj) # while dpnp functions are based on DPNP_QUEUE @@ -304,7 +309,7 @@ def get_dpnp_descriptor(ext_obj, if not queue_is_default: ext_obj = array(ext_obj, sycl_queue=default_queue) - dpnp_desc = dpnp_descriptor(ext_obj) + dpnp_desc = dpnp_descriptor(ext_obj, orig_desc) if dpnp_desc.is_valid: return dpnp_desc diff --git a/dpnp/dpnp_iface_bitwise.py b/dpnp/dpnp_iface_bitwise.py index 36f37f4282ec..92f33bc6310a 100644 --- a/dpnp/dpnp_iface_bitwise.py +++ b/dpnp/dpnp_iface_bitwise.py @@ -62,7 +62,9 @@ def _check_nd_call(origin_func, dpnp_func, x1, x2, dtype=None, out=None, where=True, **kwargs): """Choose function to call based on input and call chosen fucntion.""" - if where is not True: + if kwargs: + pass + elif where is not True: pass elif dtype is not None: pass @@ -85,7 +87,7 @@ def _check_nd_call(origin_func, dpnp_func, x1, x2, dtype=None, out=None, where=T if out is not None: if not isinstance(out, (dpnp.ndarray, dpt.usm_ndarray)): raise TypeError("return array must be of supported array type") - out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) + out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) or None else: out_desc = None @@ -273,7 +275,7 @@ def invert(x, if out is not None: if not isinstance(out, (dpnp.ndarray, dpt.usm_ndarray)): raise TypeError("return array must be of supported array type") - out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) + out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) or None else: out_desc = None return dpnp_invert(x1_desc, out_desc).get_pyobj() diff --git a/dpnp/dpnp_iface_linearalgebra.py b/dpnp/dpnp_iface_linearalgebra.py index a989f745c0a1..2a643fc8469b 100644 --- a/dpnp/dpnp_iface_linearalgebra.py +++ b/dpnp/dpnp_iface_linearalgebra.py @@ -114,7 +114,7 @@ def dot(x1, x2, out=None, **kwargs): if out is not None: if not isinstance(out, (dpnp.ndarray, dpt.usm_ndarray)): raise TypeError("return array must be of supported array type") - out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) + out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) or None else: out_desc = None return dpnp_dot(x1_desc, x2_desc, out=out_desc).get_pyobj() diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py index 08de8b2ba5a8..b98e51525d89 100644 --- a/dpnp/dpnp_iface_mathematical.py +++ b/dpnp/dpnp_iface_mathematical.py @@ -95,6 +95,41 @@ ] +def _check_nd_call(origin_func, dpnp_func, x1, x2, out=None, where=True, dtype=None, subok=True, **kwargs): + """Choose function to call based on input and call chosen fucntion.""" + + if kwargs: + pass + elif where is not True: + pass + elif dtype is not None: + pass + elif subok is not True: + pass + elif dpnp.isscalar(x1) and dpnp.isscalar(x2): + # at least either x1 or x2 has to be an array + pass + else: + # get USM type and queue to copy scalar from the host memory into a USM allocation + usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None) + + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, + alloc_usm_type=usm_type, alloc_queue=queue) + x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, + alloc_usm_type=usm_type, alloc_queue=queue) + if x1_desc and x2_desc: + if out is not None: + if not isinstance(out, (dpnp.ndarray, dpt.usm_ndarray)): + raise TypeError("return array must be of supported array type") + out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) or None + else: + out_desc = None + + return dpnp_func(x1_desc, x2_desc, dtype=dtype, out=out_desc, where=where).get_pyobj() + + return call_origin(origin_func, x1, x2, dtype=dtype, out=out, where=where, **kwargs) + + def abs(*args, **kwargs): """ Calculate the absolute value element-wise. @@ -200,7 +235,7 @@ def add(x1, ----------- Parameters `x1` and `x2` are supported as either scalar, :class:`dpnp.ndarray` or :class:`dpctl.tensor.usm_ndarray`, but both `x1` and `x2` can not be scalars at the same time. - Parameters `out`, `where`, `dtype` and `subok` are supported with their default values. + Parameters `where`, `dtype` and `subok` are supported with their default values. Keyword arguments ``kwargs`` are currently unsupported. Otherwise the function will be executed sequentially on CPU. Input array data types are limited by supported DPNP :ref:`Data types`. @@ -216,29 +251,7 @@ def add(x1, """ - if out is not None: - pass - elif where is not True: - pass - elif dtype is not None: - pass - elif subok is not True: - pass - elif dpnp.isscalar(x1) and dpnp.isscalar(x2): - # at least either x1 or x2 has to be an array - pass - else: - # get USM type and queue to copy scalar from the host memory into a USM allocation - usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None) - - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, - alloc_usm_type=usm_type, alloc_queue=queue) - x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, - alloc_usm_type=usm_type, alloc_queue=queue) - if x1_desc and x2_desc: - return dpnp_add(x1_desc, x2_desc, dtype=dtype, out=out, where=where).get_pyobj() - - return call_origin(numpy.add, x1, x2, out=out, where=where, dtype=dtype, subok=subok, **kwargs) + return _check_nd_call(numpy.add, dpnp_add, x1, x2, out=out, where=where, dtype=dtype, subok=subok, **kwargs) def around(x1, decimals=0, out=None): @@ -852,7 +865,15 @@ def fmin(*args, **kwargs): return dpnp.minimum(*args, **kwargs) -def fmod(x1, x2, dtype=None, out=None, where=True, **kwargs): +def fmod(x1, + x2, + /, + out=None, + *, + where=True, + dtype=None, + subok=True, + **kwargs): """ Calculate the element-wise remainder of division. @@ -860,55 +881,30 @@ def fmod(x1, x2, dtype=None, out=None, where=True, **kwargs): Limitations ----------- - Parameters ``x1`` and ``x2`` are supported as either :obj:`dpnp.ndarray` or scalar. - Parameters ``dtype``, ``out`` and ``where`` are supported with their default values. - Keyword arguments ``kwargs`` are currently unsupported. - Otherwise the functions will be executed sequentially on CPU. + Parameters `x1` and `x2` are supported as either scalar, :class:`dpnp.ndarray` + or :class:`dpctl.tensor.usm_ndarray`, but both `x1` and `x2` can not be scalars at the same time. + Parameters `where`, `dtype` and `subok` are supported with their default values. + Keyword argument `kwargs` is currently unsupported. + Otherwise the function will be executed sequentially on CPU. Input array data types are limited by supported DPNP :ref:`Data types`. See Also -------- - :obj:`dpnp.reminder` : Remainder complementary to floor_divide. + :obj:`dpnp.remainder` : Remainder complementary to floor_divide. :obj:`dpnp.divide` : Standard division. Examples -------- - >>> import dpnp as np - >>> a = np.array([2, -3, 4, 5, -4.5]) - >>> b = np.array([2, 2, 2, 2, 2]) - >>> result = np.fmod(a, b) + >>> import dpnp as dp + >>> a = dp.array([2, -3, 4, 5, -4.5]) + >>> b = dp.array([2, 2, 2, 2, 2]) + >>> result = dp.fmod(a, b) >>> [x for x in result] [0.0, -1.0, 0.0, 1.0, -0.5] """ - x1_is_scalar = dpnp.isscalar(x1) - x2_is_scalar = dpnp.isscalar(x2) - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False) - x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False) - - if x1_desc and x2_desc and not kwargs: - if not x1_desc and not x1_is_scalar: - pass - elif not x2_desc and not x2_is_scalar: - pass - elif x1_is_scalar and x2_is_scalar: - pass - elif x1_desc and x1_desc.ndim == 0: - pass - elif x2_desc and x2_desc.ndim == 0: - pass - elif dtype is not None: - pass - elif out is not None: - pass - elif not where: - pass - else: - out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) if out is not None else None - return dpnp_fmod(x1_desc, x2_desc, dtype, out_desc, where).get_pyobj() - - return call_origin(numpy.fmod, x1, x2, dtype=dtype, out=out, where=where, **kwargs) + return _check_nd_call(numpy.fmod, dpnp_fmod, x1, x2, out=out, where=where, dtype=dtype, subok=subok, **kwargs) def gradient(x1, *varargs, **kwargs): @@ -1142,7 +1138,7 @@ def multiply(x1, ----------- Parameters `x1` and `x2` are supported as either scalar, :class:`dpnp.ndarray` or :class:`dpctl.tensor.usm_ndarray`, but both `x1` and `x2` can not be scalars at the same time. - Parameters `out`, `where`, `dtype` and `subok` are supported with their default values. + Parameters `where`, `dtype` and `subok` are supported with their default values. Keyword arguments ``kwargs`` are currently unsupported. Otherwise the functions will be executed sequentially on CPU. Input array data types are limited by supported DPNP :ref:`Data types`. @@ -1157,29 +1153,7 @@ def multiply(x1, """ - if out is not None: - pass - elif where is not True: - pass - elif dtype is not None: - pass - elif subok is not True: - pass - elif dpnp.isscalar(x1) and dpnp.isscalar(x2): - # at least either x1 or x2 has to be an array - pass - else: - # get USM type and queue to copy scalar from the host memory into a USM allocation - usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None) - - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, - alloc_usm_type=usm_type, alloc_queue=queue) - x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, - alloc_usm_type=usm_type, alloc_queue=queue) - if x1_desc and x2_desc: - return dpnp_multiply(x1_desc, x2_desc, dtype=dtype, out=out, where=where).get_pyobj() - - return call_origin(numpy.multiply, x1, x2, out=out, where=where, dtype=dtype, subok=subok, **kwargs) + return _check_nd_call(numpy.multiply, dpnp_multiply, x1, x2, out=out, where=where, dtype=dtype, subok=subok, **kwargs) def nancumprod(x1, **kwargs): @@ -1397,34 +1371,7 @@ def power(x1, """ - if where is not True: - pass - elif dtype is not None: - pass - elif subok is not True: - pass - elif dpnp.isscalar(x1) and dpnp.isscalar(x2): - # at least either x1 or x2 has to be an array - pass - else: - # get USM type and queue to copy scalar from the host memory into a USM allocation - usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None) - - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, - alloc_usm_type=usm_type, alloc_queue=queue) - x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, - alloc_usm_type=usm_type, alloc_queue=queue) - if x1_desc and x2_desc: - if out is not None: - if not isinstance(out, (dpnp.ndarray, dpt.usm_ndarray)): - raise TypeError("return array must be of supported array type") - out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) - else: - out_desc = None - - return dpnp_power(x1_desc, x2_desc, dtype=dtype, out=out_desc, where=where).get_pyobj() - - return call_origin(numpy.power, x1, x2, out=out, where=where, dtype=dtype, subok=subok, **kwargs) + return _check_nd_call(numpy.power, dpnp_power, x1, x2, out=out, where=where, dtype=dtype, subok=subok, **kwargs) def prod(x1, axis=None, dtype=None, out=None, keepdims=False, initial=None, where=True): diff --git a/dpnp/dpnp_utils/dpnp_algo_utils.pxd b/dpnp/dpnp_utils/dpnp_algo_utils.pxd index db7127319bb0..0015e8d12c02 100644 --- a/dpnp/dpnp_utils/dpnp_algo_utils.pxd +++ b/dpnp/dpnp_utils/dpnp_algo_utils.pxd @@ -116,11 +116,13 @@ cdef class dpnp_descriptor: cdef public: # TODO remove "public" as python accessible attribute object origin_pyobj + dpnp_descriptor origin_desc dict descriptor Py_ssize_t dpnp_descriptor_data_size cpp_bool dpnp_descriptor_is_scalar cdef void * get_data(self) + cdef cpp_bool match_ctype(self, DPNPFuncType ctype) cdef shape_type_c get_common_shape(shape_type_c input1_shape, shape_type_c input2_shape) except * diff --git a/dpnp/dpnp_utils/dpnp_algo_utils.pyx b/dpnp/dpnp_utils/dpnp_algo_utils.pyx index a94381788764..7a3fb316261d 100644 --- a/dpnp/dpnp_utils/dpnp_algo_utils.pyx +++ b/dpnp/dpnp_utils/dpnp_algo_utils.pyx @@ -35,6 +35,8 @@ import numpy import dpctl import dpctl.utils as dpu +import dpctl.tensor._copy_utils as dpt_cu +import dpctl.tensor._tensor_impl as dpt_ti import dpnp.config as config import dpnp.dpnp_container as dpnp_container @@ -660,9 +662,10 @@ cdef tuple get_common_usm_allocation(dpnp_descriptor x1, dpnp_descriptor x2): cdef class dpnp_descriptor: - def __init__(self, obj): + def __init__(self, obj, dpnp_descriptor orig_desc=None): """ Initialze variables """ self.origin_pyobj = None + self.origin_desc = None self.descriptor = None self.dpnp_descriptor_data_size = 0 self.dpnp_descriptor_is_scalar = True @@ -681,6 +684,10 @@ cdef class dpnp_descriptor: self.origin_pyobj = obj + """ Keep track of a descriptor with original data """ + if orig_desc is not None and orig_desc.is_valid: + self.origin_desc = orig_desc + """ array size calculation """ cdef Py_ssize_t shape_it = 0 self.dpnp_descriptor_data_size = 1 @@ -740,6 +747,14 @@ cdef class dpnp_descriptor: def is_scalar(self): return self.dpnp_descriptor_is_scalar + @property + def is_temporary(self): + """ + Non-none descriptor of original data means the current descriptor + holds a temporary allocated data. + """ + return self.origin_desc is not None + @property def data(self): if self.is_valid: @@ -771,6 +786,15 @@ cdef class dpnp_descriptor: return interface_dict + def _copy_array_from(self, other_desc): + """ + Fill array data with usm_ndarray of the same shape from other DPNP descriptor + """ + if not isinstance(other_desc, dpnp_descriptor): + raise TypeError("expected dpnp_descriptor, got {}".format(type(other_desc))) + + dpt_cu._copy_same_shape(self.get_array(), other_desc.get_array()) + def get_pyobj(self): return self.origin_pyobj @@ -784,6 +808,29 @@ cdef class dpnp_descriptor: "expected either dpctl.tensor.usm_ndarray or dpnp.dpnp_array.dpnp_array, got {}" "".format(type(self.origin_pyobj))) + def get_result_desc(self, result_desc=None): + """ + Copy the result data into an original array + """ + if self.is_temporary: + # Original descriptor is not None, so copy the array data into it and return + from_desc = self if result_desc is None else result_desc + self.origin_desc._copy_array_from(from_desc) + return self.origin_desc + elif result_desc is not None: + # A temporary result descriptor was allocated, needs to copy data back into 'out' descriptor + self._copy_array_from(result_desc) + return self + + def is_array_overlapped(self, other_desc): + """ + Check if usm_ndarray overlaps an array from other DPNP descriptor + """ + if not isinstance(other_desc, dpnp_descriptor): + raise TypeError("expected dpnp_descriptor, got {}".format(type(other_desc))) + + return dpt_ti._array_overlap(self.get_array(), other_desc.get_array()) + cdef void * get_data(self): cdef Py_ssize_t item_size = 0 cdef Py_ssize_t elem_offset = 0 @@ -798,6 +845,9 @@ cdef class dpnp_descriptor: return < void * > val + cdef cpp_bool match_ctype(self, DPNPFuncType ctype): + return self.dtype == dpnp_DPNPFuncType_to_dtype(< size_t > ctype) + def __bool__(self): return self.is_valid diff --git a/tests/skipped_tests.tbl b/tests/skipped_tests.tbl index 2e7a5a6d6f3e..08a59798d99b 100644 --- a/tests/skipped_tests.tbl +++ b/tests/skipped_tests.tbl @@ -715,19 +715,15 @@ tests/third_party/cupy/manipulation_tests/test_tiling.py::TestTile_param_5_{reps tests/third_party/cupy/manipulation_tests/test_transpose.py::TestTranspose::test_moveaxis_invalid5_2 tests/third_party/cupy/manipulation_tests/test_transpose.py::TestTranspose::test_moveaxis_invalid5_3 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_455_{arg1=array([[1, 2, 3], [4, 5, 6]], dtype=int32), arg2=array([[0, 1, 2], [3, 4, 5]], dtype=int32), dtype=float64, name='floor_divide', use_dtype=False}::test_binary -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_457_{arg1=array([[1, 2, 3], [4, 5, 6]], dtype=int32), arg2=array([[0, 1, 2], [3, 4, 5]], dtype=int32), dtype=float64, name='fmod', use_dtype=False}::test_binary tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_459_{arg1=array([[1, 2, 3], [4, 5, 6]], dtype=int32), arg2=array([[0, 1, 2], [3, 4, 5]], dtype=int32), dtype=float64, name='remainder', use_dtype=False}::test_binary tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_461_{arg1=array([[1, 2, 3], [4, 5, 6]], dtype=int32), arg2=array([[0, 1, 2], [3, 4, 5]], dtype=int32), dtype=float64, name='mod', use_dtype=False}::test_binary tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_463_{arg1=array([[1, 2, 3], [4, 5, 6]], dtype=int32), arg2=array([[0, 1, 2], [3, 4, 5]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_465_{arg1=array([[1, 2, 3], [4, 5, 6]], dtype=int32), arg2=array([[0, 1, 2], [3, 4, 5]]), dtype=float64, name='fmod', use_dtype=False}::test_binary tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_467_{arg1=array([[1, 2, 3], [4, 5, 6]], dtype=int32), arg2=array([[0, 1, 2], [3, 4, 5]]), dtype=float64, name='remainder', use_dtype=False}::test_binary tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_469_{arg1=array([[1, 2, 3], [4, 5, 6]], dtype=int32), arg2=array([[0, 1, 2], [3, 4, 5]]), dtype=float64, name='mod', use_dtype=False}::test_binary tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_535_{arg1=array([[1, 2, 3], [4, 5, 6]]), arg2=array([[0, 1, 2], [3, 4, 5]], dtype=int32), dtype=float64, name='floor_divide', use_dtype=False}::test_binary -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_537_{arg1=array([[1, 2, 3], [4, 5, 6]]), arg2=array([[0, 1, 2], [3, 4, 5]], dtype=int32), dtype=float64, name='fmod', use_dtype=False}::test_binary tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_539_{arg1=array([[1, 2, 3], [4, 5, 6]]), arg2=array([[0, 1, 2], [3, 4, 5]], dtype=int32), dtype=float64, name='remainder', use_dtype=False}::test_binary tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_541_{arg1=array([[1, 2, 3], [4, 5, 6]]), arg2=array([[0, 1, 2], [3, 4, 5]], dtype=int32), dtype=float64, name='mod', use_dtype=False}::test_binary tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_543_{arg1=array([[1, 2, 3], [4, 5, 6]]), arg2=array([[0, 1, 2], [3, 4, 5]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_545_{arg1=array([[1, 2, 3], [4, 5, 6]]), arg2=array([[0, 1, 2], [3, 4, 5]]), dtype=float64, name='fmod', use_dtype=False}::test_binary tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_547_{arg1=array([[1, 2, 3], [4, 5, 6]]), arg2=array([[0, 1, 2], [3, 4, 5]]), dtype=float64, name='remainder', use_dtype=False}::test_binary tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_549_{arg1=array([[1, 2, 3], [4, 5, 6]]), arg2=array([[0, 1, 2], [3, 4, 5]]), dtype=float64, name='mod', use_dtype=False}::test_binary tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticModf::test_modf diff --git a/tests/test_indexing.py b/tests/test_indexing.py index 41128fd70e2d..fb49d8c87495 100644 --- a/tests/test_indexing.py +++ b/tests/test_indexing.py @@ -6,10 +6,70 @@ import numpy from numpy.testing import ( - assert_array_equal + assert_, + assert_array_equal, + assert_equal ) +class TestIndexing: + def test_ellipsis_index(self): + a = dpnp.array([[1, 2, 3], + [4, 5, 6], + [7, 8, 9]]) + assert_(a[...] is not a) + assert_equal(a[...], a) + + # test that slicing with ellipsis doesn't skip an arbitrary number of dimensions + assert_equal(a[0, ...], a[0]) + assert_equal(a[0, ...], a[0,:]) + assert_equal(a[..., 0], a[:, 0]) + + # test that slicing with ellipsis always results in an array + assert_equal(a[0, ..., 1], dpnp.array(2)) + + # assignment with `(Ellipsis,)` on 0-d arrays + b = dpnp.array(1) + b[(Ellipsis,)] = 2 + assert_equal(b, 2) + + def test_boolean_indexing_list(self): + a = dpnp.array([1, 2, 3]) + b = dpnp.array([True, False, True]) + + assert_equal(a[b], [1, 3]) + assert_equal(a[None, b], [[1, 3]]) + + def test_indexing_array_weird_strides(self): + np_x = numpy.ones(10) + dp_x = dpnp.ones(10) + + np_ind = numpy.arange(10)[:, None, None, None] + np_ind = numpy.broadcast_to(np_ind, (10, 55, 4, 4)) + + dp_ind = dpnp.arange(10)[:, None, None, None] + dp_ind = dpnp.broadcast_to(dp_ind, (10, 55, 4, 4)) + + # single advanced index case + assert_array_equal(dp_x[dp_ind], np_x[np_ind]) + + np_x2 = numpy.ones((10, 2)) + dp_x2 = dpnp.ones((10, 2)) + + np_zind = numpy.zeros(4, dtype=np_ind.dtype) + dp_zind = dpnp.zeros(4, dtype=dp_ind.dtype) + + # higher dimensional advanced index + assert_array_equal(dp_x2[dp_ind, dp_zind], np_x2[np_ind, np_zind]) + + def test_indexing_array_negative_strides(self): + arr = dpnp.zeros((4, 4))[::-1, ::-1] + + slices = (slice(None), dpnp.array([0, 1, 2, 3])) + arr[slices] = 10 + assert_array_equal(arr, 10.) + + @pytest.mark.usefixtures("allow_fall_back_on_numpy") def test_choose(): a = numpy.r_[:4] diff --git a/tests/test_mathematical.py b/tests/test_mathematical.py index e58e129c03b3..a5449e2cf6f0 100644 --- a/tests/test_mathematical.py +++ b/tests/test_mathematical.py @@ -1,6 +1,7 @@ import pytest from .helper import ( get_all_dtypes, + get_float_complex_dtypes, is_cpu_device, is_win_platform ) @@ -71,11 +72,14 @@ def test_diff(array): @pytest.mark.parametrize("dtype1", get_all_dtypes()) @pytest.mark.parametrize("dtype2", get_all_dtypes()) @pytest.mark.parametrize("func", - ['add', 'divide', 'multiply', 'power', 'subtract']) + ['add', 'divide', 'fmod', 'multiply', 'power', 'subtract']) @pytest.mark.parametrize("data", [[[1, 2], [3, 4]]], ids=['[[1, 2], [3, 4]]']) def test_op_multiple_dtypes(dtype1, func, dtype2, data): + if func == 'fmod' and (dpnp.issubdtype(dtype1, dpnp.complexfloating) or dpnp.issubdtype(dtype2, dpnp.complexfloating)): + pytest.skip("no support of fmod for complex") + np_a = numpy.array(data, dtype=dtype1) dpnp_a = dpnp.array(data, dtype=dtype1) @@ -141,9 +145,18 @@ def test_copysign(self, dtype, lhs, rhs): def test_divide(self, dtype, lhs, rhs): self._test_mathematical('divide', dtype, lhs, rhs) - @pytest.mark.usefixtures("allow_fall_back_on_numpy") @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True)) def test_fmod(self, dtype, lhs, rhs): + if dtype == dpnp.float32 and rhs == 0.3: + """ + Due to some reason NumPy behaves incorrectly, when: + >>> numpy.fmod(numpy.array([3.9], dtype=numpy.float32), 0.3) + array([0.29999995], dtype=float32) + while dpnp returns something around zero which is expected: + >>> dpnp.fmod(dpnp.array([3.9], dtype=dpnp.float32), 0.3) + array([9.53674318e-08]) + """ + pytest.skip("missaligned with numpy results") self._test_mathematical('fmod', dtype, lhs, rhs) @pytest.mark.usefixtures("allow_fall_back_on_numpy") @@ -216,6 +229,9 @@ def test_op_with_scalar(array, val, func, data_type, val_type): elif is_cpu_device() and data_type == dpnp.complex128: # TODO: discuss the bahavior with OneMKL team pytest.skip("(0j ** 5) is different: (NaN + NaNj) in dpnp and (0j) in numpy") + elif func == 'fmod' and ((data_type is None or not dpnp.issubdtype(data_type, dpnp.floating)) and + (val_type != float or dpnp.issubdtype(data_type, dpnp.complexfloating))): + pytest.skip("dpnp.fmod(a, 0) != 0 for integer a, like it's in numpy") if func == 'subtract' and val_type == bool and data_type == dpnp.bool: with pytest.raises(TypeError): @@ -634,34 +650,232 @@ def test_invalid_shape(self, shape): dpnp.trunc(dp_array, out=dp_out) -class TestPower: +class TestAdd: + @pytest.mark.parametrize("dtype", get_all_dtypes(no_none=True)) + def test_add(self, dtype): + array1_data = numpy.arange(10) + array2_data = numpy.arange(5, 15) + out = numpy.empty(10, dtype=dtype) + + # DPNP + dp_array1 = dpnp.array(array1_data, dtype=dtype) + dp_array2 = dpnp.array(array2_data, dtype=dtype) + dp_out = dpnp.array(out, dtype=dtype) + result = dpnp.add(dp_array1, dp_array2, out=dp_out) + + # original + np_array1 = numpy.array(array1_data, dtype=dtype) + np_array2 = numpy.array(array2_data, dtype=dtype) + expected = numpy.add(np_array1, np_array2, out=out) + + assert_allclose(expected, result) + assert_allclose(out, dp_out) + + @pytest.mark.parametrize("dtype", get_all_dtypes(no_none=True)) + def test_out_dtypes(self, dtype): + size = 2 if dtype == dpnp.bool else 10 + + np_array1 = numpy.arange(size, 2 * size, dtype=dtype) + np_array2 = numpy.arange(size, dtype=dtype) + np_out = numpy.empty(size, dtype=numpy.complex64) + expected = numpy.add(np_array1, np_array2, out=np_out) - def test_power(self): + dp_array1 = dpnp.arange(size, 2 * size, dtype=dtype) + dp_array2 = dpnp.arange(size, dtype=dtype) + dp_out = dpnp.empty(size, dtype=dpnp.complex64) + result = dpnp.add(dp_array1, dp_array2, out=dp_out) + + assert_array_equal(expected, result) + + @pytest.mark.parametrize("dtype", get_all_dtypes(no_none=True)) + def test_out_overlap(self, dtype): + size = 1 if dtype == dpnp.bool else 15 + + np_a = numpy.arange(2 * size, dtype=dtype) + expected = numpy.add(np_a[size::], np_a[::2], out=np_a[:size:]) + + dp_a = dpnp.arange(2 * size, dtype=dtype) + result = dpnp.add(dp_a[size::], dp_a[::2], out=dp_a[:size:]) + + assert_allclose(expected, result) + assert_allclose(dp_a, np_a) + + @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_none=True)) + def test_inplace_strided_out(self, dtype): + size = 21 + + np_a = numpy.arange(size, dtype=dtype) + np_a[::3] += 4 + + dp_a = dpnp.arange(size, dtype=dtype) + dp_a[::3] += 4 + + assert_allclose(dp_a, np_a) + + @pytest.mark.parametrize("shape", + [(0,), (15, ), (2, 2)], + ids=['(0,)', '(15, )', '(2,2)']) + def test_invalid_shape(self, shape): + dp_array1 = dpnp.arange(10, dtype=dpnp.float64) + dp_array2 = dpnp.arange(5, 15, dtype=dpnp.float64) + dp_out = dpnp.empty(shape, dtype=dpnp.float64) + + with pytest.raises(ValueError): + dpnp.add(dp_array1, dp_array2, out=dp_out) + + @pytest.mark.parametrize("out", + [4, (), [], (3, 7), [2, 4]], + ids=['4', '()', '[]', '(3, 7)', '[2, 4]']) + def test_invalid_out(self, out): + a = dpnp.arange(10) + + assert_raises(TypeError, dpnp.add, a, 2, out) + assert_raises(TypeError, numpy.add, a.asnumpy(), 2, out) + + +class TestMultiply: + @pytest.mark.parametrize("dtype", get_all_dtypes(no_none=True)) + def test_multiply(self, dtype): array1_data = numpy.arange(10) array2_data = numpy.arange(5, 15) - out = numpy.empty(10, dtype=numpy.float64) + out = numpy.empty(10, dtype=dtype) # DPNP - dp_array1 = dpnp.array(array1_data, dtype=dpnp.float64) - dp_array2 = dpnp.array(array2_data, dtype=dpnp.float64) - dp_out = dpnp.array(out, dtype=dpnp.float64) + dp_array1 = dpnp.array(array1_data, dtype=dtype) + dp_array2 = dpnp.array(array2_data, dtype=dtype) + dp_out = dpnp.array(out, dtype=dtype) + result = dpnp.multiply(dp_array1, dp_array2, out=dp_out) + + # original + np_array1 = numpy.array(array1_data, dtype=dtype) + np_array2 = numpy.array(array2_data, dtype=dtype) + expected = numpy.multiply(np_array1, np_array2, out=out) + + assert_allclose(expected, result) + assert_allclose(out, dp_out) + + @pytest.mark.parametrize("dtype", get_all_dtypes(no_none=True)) + def test_out_dtypes(self, dtype): + size = 2 if dtype == dpnp.bool else 10 + + np_array1 = numpy.arange(size, 2 * size, dtype=dtype) + np_array2 = numpy.arange(size, dtype=dtype) + np_out = numpy.empty(size, dtype=numpy.complex64) + expected = numpy.multiply(np_array1, np_array2, out=np_out) + + dp_array1 = dpnp.arange(size, 2 * size, dtype=dtype) + dp_array2 = dpnp.arange(size, dtype=dtype) + dp_out = dpnp.empty(size, dtype=dpnp.complex64) + result = dpnp.multiply(dp_array1, dp_array2, out=dp_out) + + assert_array_equal(expected, result) + + @pytest.mark.parametrize("dtype", get_all_dtypes(no_none=True)) + def test_out_overlap(self, dtype): + size = 1 if dtype == dpnp.bool else 15 + + np_a = numpy.arange(2 * size, dtype=dtype) + expected = numpy.multiply(np_a[size::], np_a[::2], out=np_a[:size:]) + + dp_a = dpnp.arange(2 * size, dtype=dtype) + result = dpnp.multiply(dp_a[size::], dp_a[::2], out=dp_a[:size:]) + + assert_allclose(expected, result) + assert_allclose(dp_a, np_a) + + @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_none=True)) + def test_inplace_strided_out(self, dtype): + size = 21 + + np_a = numpy.arange(size, dtype=dtype) + np_a[::3] *= 4 + + dp_a = dpnp.arange(size, dtype=dtype) + dp_a[::3] *= 4 + + assert_allclose(dp_a, np_a) + + @pytest.mark.parametrize("shape", + [(0,), (15, ), (2, 2)], + ids=['(0,)', '(15, )', '(2,2)']) + def test_invalid_shape(self, shape): + dp_array1 = dpnp.arange(10, dtype=dpnp.float64) + dp_array2 = dpnp.arange(5, 15, dtype=dpnp.float64) + dp_out = dpnp.empty(shape, dtype=dpnp.float64) + + with pytest.raises(ValueError): + dpnp.multiply(dp_array1, dp_array2, out=dp_out) + + @pytest.mark.parametrize("out", + [4, (), [], (3, 7), [2, 4]], + ids=['4', '()', '[]', '(3, 7)', '[2, 4]']) + def test_invalid_out(self, out): + a = dpnp.arange(10) + + assert_raises(TypeError, dpnp.multiply, a, 2, out) + assert_raises(TypeError, numpy.multiply, a.asnumpy(), 2, out) + + +class TestPower: + @pytest.mark.parametrize("dtype", get_float_complex_dtypes()) + def test_power(self, dtype): + array1_data = numpy.arange(10) + array2_data = numpy.arange(5, 15) + out = numpy.empty(10, dtype=dtype) + + # DPNP + dp_array1 = dpnp.array(array1_data, dtype=dtype) + dp_array2 = dpnp.array(array2_data, dtype=dtype) + dp_out = dpnp.array(out, dtype=dtype) result = dpnp.power(dp_array1, dp_array2, out=dp_out) # original - np_array1 = numpy.array(array1_data, dtype=numpy.float64) - np_array2 = numpy.array(array2_data, dtype=numpy.float64) + np_array1 = numpy.array(array1_data, dtype=dtype) + np_array2 = numpy.array(array2_data, dtype=dtype) expected = numpy.power(np_array1, np_array2, out=out) - assert_array_equal(expected, result) + assert_allclose(expected, result) @pytest.mark.parametrize("dtype", get_all_dtypes(no_complex=True, no_none=True)) - def test_invalid_dtype(self, dtype): - dp_array1 = dpnp.arange(10, dtype=dpnp.complex64) - dp_array2 = dpnp.arange(5, 15, dtype=dpnp.complex64) - dp_out = dpnp.empty(10, dtype=dtype) + def test_out_dtypes(self, dtype): + size = 2 if dtype == dpnp.bool else 5 - with pytest.raises(ValueError): - dpnp.power(dp_array1, dp_array2, out=dp_out) + np_array1 = numpy.arange(size, 2 * size, dtype=dtype) + np_array2 = numpy.arange(size, dtype=dtype) + np_out = numpy.empty(size, dtype=numpy.complex64) + expected = numpy.power(np_array1, np_array2, out=np_out) + + dp_array1 = dpnp.arange(size, 2 * size, dtype=dtype) + dp_array2 = dpnp.arange(size, dtype=dtype) + dp_out = dpnp.empty(size, dtype=dpnp.complex64) + result = dpnp.power(dp_array1, dp_array2, out=dp_out) + + assert_array_equal(expected, result) + + @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True, no_none=True)) + def test_out_overlap(self, dtype): + size = 5 + + np_a = numpy.arange(2 * size, dtype=dtype) + expected = numpy.power(np_a[size::], np_a[::2], out=np_a[:size:]) + + dp_a = dpnp.arange(2 * size, dtype=dtype) + result = dpnp.power(dp_a[size::], dp_a[::2], out=dp_a[:size:]) + + assert_allclose(expected, result) + assert_allclose(dp_a, np_a) + + @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True, no_none=True)) + def test_inplace_strided_out(self, dtype): + size = 5 + + np_a = numpy.arange(2 * size, dtype=dtype) + np_a[::3] **= 3 + + dp_a = dpnp.arange(2 * size, dtype=dtype) + dp_a[::3] **= 3 + + assert_allclose(dp_a, np_a) @pytest.mark.parametrize("shape", [(0,), (15, ), (2, 2)], diff --git a/tests/test_strides.py b/tests/test_strides.py index e56e9befeee4..849e7313f643 100644 --- a/tests/test_strides.py +++ b/tests/test_strides.py @@ -203,14 +203,113 @@ def test_strides_fmod(dtype, shape): @pytest.mark.parametrize("shape", [(3, 3)], ids=["(3, 3)"]) -def test_strides_true_devide(dtype, shape): +def test_strides_true_divide(dtype, shape): a = numpy.arange(numpy.prod(shape), dtype=dtype).reshape(shape) b = a.T + 1 dpa = dpnp.reshape(dpnp.arange(numpy.prod(shape), dtype=dtype), shape) dpb = dpa.T + 1 - result = dpnp.fmod(dpa, dpb) - expected = numpy.fmod(a, b) + result = dpnp.true_divide(dpa, dpb) + expected = numpy.true_divide(a, b) assert_allclose(result, expected) + + +@pytest.mark.parametrize("func_name", + ["add", "multiply", "power"]) +@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True)) +def test_strided_out_2args(func_name, dtype): + np_out = numpy.ones((5, 3, 2))[::3] + np_a = numpy.arange(numpy.prod(np_out.shape), dtype=dtype).reshape(np_out.shape) + np_b = numpy.full(np_out.shape, fill_value=0.7, dtype=dtype) + + dp_out = dpnp.ones((5, 3, 2))[::3] + dp_a = dpnp.array(np_a) + dp_b = dpnp.array(np_b) + + np_res = _getattr(numpy, func_name)(np_a, np_b, out=np_out) + dp_res = _getattr(dpnp, func_name)(dp_a, dp_b, out=dp_out) + + assert_allclose(dp_res.asnumpy(), np_res) + assert_allclose(dp_out.asnumpy(), np_out) + + +@pytest.mark.parametrize("func_name", + ["add", "multiply", "power"]) +@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True)) +def test_strided_in_out_2args(func_name, dtype): + sh = (3, 4, 2) + prod = numpy.prod(sh) + + np_out = numpy.ones(sh, dtype=dtype)[::2] + np_a = numpy.arange(prod, dtype=dtype).reshape(sh)[::2] + np_b = numpy.full(sh, fill_value=0.7, dtype=dtype)[::2].T + + dp_out = dpnp.ones(sh, dtype=dtype)[::2] + dp_a = dpnp.arange(prod, dtype=dtype).reshape(sh)[::2] + dp_b = dpnp.full(sh, fill_value=0.7, dtype=dtype)[::2].T + + np_res = _getattr(numpy, func_name)(np_a, np_b, out=np_out) + dp_res = _getattr(dpnp, func_name)(dp_a, dp_b, out=dp_out) + + assert_allclose(dp_res.asnumpy(), np_res, rtol=1e-06) + assert_allclose(dp_out.asnumpy(), np_out, rtol=1e-06) + + +@pytest.mark.parametrize("func_name", + ["add", "multiply", "power"]) +@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True)) +def test_strided_in_out_2args_diff_out_dtype(func_name, dtype): + sh = (3, 3, 2) + prod = numpy.prod(sh) + + np_out = numpy.ones(sh, dtype=numpy.complex64)[::2] + np_a = numpy.arange(prod, dtype=dtype).reshape(sh)[::2].T + np_b = numpy.full(sh, fill_value=0.7, dtype=dtype)[::2] + + dp_out = dpnp.ones(sh, dtype=dpnp.complex64)[::2] + dp_a = dpnp.arange(prod, dtype=dtype).reshape(sh)[::2].T + dp_b = dpnp.full(sh, fill_value=0.7, dtype=dtype)[::2] + + np_res = _getattr(numpy, func_name)(np_a, np_b, out=np_out) + dp_res = _getattr(dpnp, func_name)(dp_a, dp_b, out=dp_out) + + assert_allclose(dp_res.asnumpy(), np_res, rtol=1e-06) + assert_allclose(dp_out.asnumpy(), np_out, rtol=1e-06) + + +@pytest.mark.parametrize("func_name", + ["add", "multiply", "power"]) +@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True, no_none=True)) +def test_strided_in_2args_overlap(func_name, dtype): + size = 5 + + np_a = numpy.arange(2 * size, dtype=dtype) + dp_a = dpnp.arange(2 * size, dtype=dtype) + + np_res = _getattr(numpy, func_name)(np_a[size::], np_a[::2], out=np_a[:size:]) + dp_res = _getattr(dpnp, func_name)(dp_a[size::], dp_a[::2], out=dp_a[:size:]) + + assert_allclose(dp_res.asnumpy(), np_res, rtol=1e-06) + assert_allclose(dp_a.asnumpy(), np_a, rtol=1e-06) + + +@pytest.mark.parametrize("func_name", + ["add", "multiply", "power"]) +@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True, no_none=True)) +def test_strided_in_out_2args_overlap(func_name, dtype): + sh = (4, 3, 2) + prod = numpy.prod(sh) + + np_a = numpy.arange(prod, dtype=dtype).reshape(sh) + np_b = numpy.full(np_a[::2].shape, fill_value=0.7, dtype=dtype) + + dp_a = dpnp.arange(prod, dtype=dtype).reshape(sh) + dp_b = dpnp.full(dp_a[::2].shape, fill_value=0.7, dtype=dtype) + + np_res = _getattr(numpy, func_name)(np_a[::2], np_b, out=np_a[1::2]) + dp_res = _getattr(dpnp, func_name)(dp_a[::2], dp_b, out=dp_a[1::2]) + + assert_allclose(dp_res.asnumpy(), np_res, rtol=1e-06) + assert_allclose(dp_a.asnumpy(), np_a, rtol=1e-06) diff --git a/tests/test_sycl_queue.py b/tests/test_sycl_queue.py index a523c46465bf..ab974e426f93 100644 --- a/tests/test_sycl_queue.py +++ b/tests/test_sycl_queue.py @@ -945,3 +945,15 @@ def test_broadcast_to(device): x = dpnp.arange(5, device=device) y = dpnp.broadcast_to(x, (3, 5)) assert_sycl_queue_equal(x.sycl_queue, y.sycl_queue) + + +@pytest.mark.parametrize("device_x", + valid_devices, + ids=[device.filter_string for device in valid_devices]) +@pytest.mark.parametrize("device_y", + valid_devices, + ids=[device.filter_string for device in valid_devices]) +def test_asarray(device_x, device_y): + x = dpnp.array([1, 2, 3], device=device_x) + y = dpnp.asarray([x, x, x], device=device_y) + assert_sycl_queue_equal(y.sycl_queue, x.to_device(device_y).sycl_queue) diff --git a/tests/test_umath.py b/tests/test_umath.py index 6122b253ca37..3a1f4467dcea 100644 --- a/tests/test_umath.py +++ b/tests/test_umath.py @@ -1,6 +1,14 @@ import pytest +from .helper import ( + get_all_dtypes +) import numpy +from numpy.testing import ( + assert_allclose, + assert_array_equal +) + import dpnp # full list of umaths @@ -71,7 +79,7 @@ def test_umaths(test_cases): # DPNP result = getattr(dpnp, umath)(*iargs) - numpy.testing.assert_allclose(result, expected, rtol=1e-6) + assert_allclose(result, expected, rtol=1e-6) class TestSin: @@ -89,7 +97,7 @@ def test_sin_ordinary(self): np_array = numpy.array(array_data, dtype=numpy.float64) expected = numpy.sin(np_array, out=out) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) @pytest.mark.parametrize("dtype", [numpy.float32, numpy.int64, numpy.int32], @@ -129,7 +137,7 @@ def test_cos(self): np_array = numpy.array(array_data, dtype=numpy.float64) expected = numpy.cos(np_array, out=out) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) @pytest.mark.parametrize("dtype", [numpy.float32, numpy.int64, numpy.int32], @@ -169,7 +177,7 @@ def test_log(self): np_array = numpy.array(array_data, dtype=numpy.float64) expected = numpy.log(np_array, out=out) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) @pytest.mark.parametrize("dtype", [numpy.float32, numpy.int64, numpy.int32], @@ -209,7 +217,7 @@ def test_exp(self): np_array = numpy.array(array_data, dtype=numpy.float64) expected = numpy.exp(np_array, out=out) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) @pytest.mark.parametrize("dtype", [numpy.float32, numpy.int64, numpy.int32], @@ -249,7 +257,7 @@ def test_arcsin(self): np_array = numpy.array(array_data, dtype=numpy.float64) expected = numpy.arcsin(np_array, out=out) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) @pytest.mark.parametrize("dtype", [numpy.float32, numpy.int64, numpy.int32], @@ -289,7 +297,7 @@ def test_arctan(self): np_array = numpy.array(array_data, dtype=numpy.float64) expected = numpy.arctan(np_array, out=out) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) @pytest.mark.parametrize("dtype", [numpy.float32, numpy.int64, numpy.int32], @@ -329,7 +337,7 @@ def test_tan(self): np_array = numpy.array(array_data, dtype=numpy.float64) expected = numpy.tan(np_array, out=out) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) @pytest.mark.parametrize("dtype", [numpy.float32, numpy.int64, numpy.int32], @@ -355,7 +363,6 @@ def test_invalid_shape(self, shape): class TestArctan2: - def test_arctan2(self): array_data = numpy.arange(10) out = numpy.empty(10, dtype=numpy.float64) @@ -369,18 +376,21 @@ def test_arctan2(self): np_array = numpy.array(array_data, dtype=numpy.float64) expected = numpy.arctan2(np_array, np_array, out=out) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) - @pytest.mark.parametrize("dtype", - [numpy.float32, numpy.int64, numpy.int32], - ids=['numpy.float32', 'numpy.int64', 'numpy.int32']) - def test_invalid_dtype(self, dtype): + @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True, no_none=True)) + def test_out_dtypes(self, dtype): + size = 2 if dtype == dpnp.bool else 10 - dp_array = dpnp.arange(10, dtype=dpnp.float64) - dp_out = dpnp.empty(10, dtype=dtype) + np_array = numpy.arange(size, dtype=dtype) + np_out = numpy.empty(size, dtype=numpy.complex64) + expected = numpy.arctan2(np_array, np_array, out=np_out) - with pytest.raises(ValueError): - dpnp.arctan2(dp_array, dp_array, out=dp_out) + dp_array = dpnp.arange(size, dtype=dtype) + dp_out = dpnp.empty(size, dtype=dpnp.complex64) + result = dpnp.arctan2(dp_array, dp_array, out=dp_out) + + assert_allclose(expected, result) @pytest.mark.parametrize("shape", [(0,), (15, ), (2, 2)], diff --git a/tests/test_usm_type.py b/tests/test_usm_type.py index 817bdee66a57..06c7f2b5cb24 100644 --- a/tests/test_usm_type.py +++ b/tests/test_usm_type.py @@ -18,6 +18,8 @@ def test_coerced_usm_types_sum(usm_type_x, usm_type_y): y = dp.arange(1000, usm_type = usm_type_y) z = 1.3 + x + y + 2 + z += x + z += 7.4 assert x.usm_type == usm_type_x assert y.usm_type == usm_type_y @@ -31,6 +33,8 @@ def test_coerced_usm_types_mul(usm_type_x, usm_type_y): y = dp.arange(10, usm_type = usm_type_y) z = 3 * x * y * 1.5 + z *= x + z *= 4.8 assert x.usm_type == usm_type_x assert y.usm_type == usm_type_y @@ -180,6 +184,9 @@ def test_meshgrid(usm_type_x, usm_type_y): pytest.param("dot", [[0., 1., 2.], [3., 4., 5.]], [[4., 4.], [4., 4.], [4., 4.]]), + pytest.param("fmod", + [-3., -2., -1., 1., 2., 3.], + [2., 2., 2., 2., 2., 2.]), ], ) @pytest.mark.parametrize("usm_type_x", list_of_usm_types, ids=list_of_usm_types) diff --git a/tests/third_party/cupy/math_tests/test_arithmetic.py b/tests/third_party/cupy/math_tests/test_arithmetic.py index 39dc3e10f721..3150e4da99e0 100644 --- a/tests/third_party/cupy/math_tests/test_arithmetic.py +++ b/tests/third_party/cupy/math_tests/test_arithmetic.py @@ -145,6 +145,14 @@ def check_binary(self, xp): if dtype1 in (numpy.float16, numpy.float32): y = y.astype(numpy.complex64) + if xp is cupy and not xp.isscalar(y) and not self.use_dtype: + if self.name == 'fmod': + # TODO: Fix this: fmod(a, 0) + # numpy => 0 + # cupy => 2147483647 + if not (dtype1 in float_types or dtype2 in float_types) and (np2 == 0).any(): + y[xp.broadcast_to(xp.array(arg2), y.shape) == 0] = 0 + # NumPy returns an output array of another type than DPNP when input ones have diffrent types. if xp is cupy and dtype1 != dtype2 and not self.use_dtype: is_array_arg1 = not xp.isscalar(arg1) @@ -153,7 +161,7 @@ def check_binary(self, xp): is_int_float = lambda _x, _y: numpy.issubdtype(_x, numpy.integer) and numpy.issubdtype(_y, numpy.floating) is_same_type = lambda _x, _y, _type: numpy.issubdtype(_x, _type) and numpy.issubdtype(_y, _type) - if self.name in ('add', 'multiply', 'power', 'subtract'): + if self.name in ('add', 'fmod', 'multiply', 'power', 'subtract'): if is_array_arg1 and is_array_arg2: # If both inputs are arrays where one is of floating type and another - integer, # NumPy will return an output array of always "float64" type,