From 369c500c0851e94c15b89ac197509b4eabd6dde2 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Wed, 16 Aug 2023 00:02:11 -0700 Subject: [PATCH 1/8] Binary elementwise functions can now act on any input in-place - A temporary will be allocated as necessary (i.e., when arrays overlap, are not going to be cast, and are not the same logical arrays) - Uses dedicated in-place kernels where they are implemented - Now called directly by Python operators - Removes _inplace method of BinaryElementwiseFunc class - Removes _find_inplace_dtype function --- dpctl/tensor/_elementwise_common.py | 249 +++++++----------- dpctl/tensor/_type_utils.py | 18 -- dpctl/tensor/_usmarray.pyx | 9 +- .../source/elementwise_functions.hpp | 11 +- 4 files changed, 110 insertions(+), 177 deletions(-) diff --git a/dpctl/tensor/_elementwise_common.py b/dpctl/tensor/_elementwise_common.py index f924ee31cd..8ecd5f585e 100644 --- a/dpctl/tensor/_elementwise_common.py +++ b/dpctl/tensor/_elementwise_common.py @@ -31,7 +31,6 @@ _acceptance_fn_default, _find_buf_dtype, _find_buf_dtype2, - _find_inplace_dtype, _to_device_supported_dtype, ) @@ -383,14 +382,6 @@ def __repr__(self): return f"<{self.__name__} '{self.name_}'>" def __call__(self, o1, o2, out=None, order="K"): - # FIXME: replace with check against base array - # when views can be identified - if self.binary_inplace_fn_: - if o1 is out: - return self._inplace(o1, o2) - elif o2 is out: - return self._inplace(o2, o1) - if order not in ["K", "C", "F", "A"]: order = "K" q1, o1_usm_type = _get_queue_usm_type(o1) @@ -472,6 +463,7 @@ def __call__(self, o1, o2, out=None, order="K"): "supported types according to the casting rule ''safe''." ) + orig_out = out if out is not None: if not isinstance(out, dpt.usm_ndarray): raise TypeError( @@ -484,19 +476,76 @@ def __call__(self, o1, o2, out=None, order="K"): f"Expected output shape is {o1_shape}, got {out.shape}" ) - if ti._array_overlap(o1, out) or ti._array_overlap(o2, out): - raise TypeError("Input and output arrays have memory overlap") + if res_dt != out.dtype: + raise TypeError( + f"Output array of type {res_dt} is needed," + f"got {out.dtype}" + ) if ( - dpctl.utils.get_execution_queue( - (o1.sycl_queue, o2.sycl_queue, out.sycl_queue) - ) + dpctl.utils.get_execution_queue((exec_q, out.sycl_queue)) is None ): raise TypeError( "Input and output allocation queues are not compatible" ) + if isinstance(o1, dpt.usm_ndarray): + if ti._array_overlap(o1, out) and buf1_dt is None: + if not ti._same_logical_tensors(o1, out): + out = dpt.empty_like(out) + elif self.binary_inplace_fn_ is not None: + # if there is a dedicated in-place kernel + # it can be called here, otherwise continues + if isinstance(o2, dpt.usm_ndarray): + src2 = o2 + if ( + ti._array_overlap(o2, out) + and not ti._same_logical_tensors(o2, out) + and buf2_dt is None + ): + buf2_dt = o2_dtype + else: + src2 = dpt.asarray( + o2, dtype=o2_dtype, sycl_queue=exec_q + ) + if buf2_dt is None: + src2 = dpt.broadcast_to(src2, res_shape) + ht_, _ = self.binary_inplace_fn_( + lhs=o1, rhs=src2, sycl_queue=exec_q + ) + ht_.wait() + else: + buf2 = dpt.empty_like(src2, dtype=buf2_dt) + ( + ht_copy_ev, + copy_ev, + ) = ti._copy_usm_ndarray_into_usm_ndarray( + src=src2, dst=buf2, sycl_queue=exec_q + ) + + buf2 = dpt.broadcast_to(buf2, res_shape) + ht_, _ = self.binary_inplace_fn_( + lhs=o1, + rhs=buf2, + sycl_queue=exec_q, + depends=[copy_ev], + ) + ht_copy_ev.wait() + ht_.wait() + + return out + + if isinstance(o2, dpt.usm_ndarray): + if ( + ti._array_overlap(o2, out) + and not ti._same_logical_tensors(o2, out) + and buf2_dt is None + ): + # should not reach if out is reallocated + # after being checked against o1 + out = dpt.empty_like(out) + if isinstance(o1, dpt.usm_ndarray): src1 = o1 else: @@ -532,19 +581,23 @@ def __call__(self, o1, o2, out=None, order="K"): sycl_queue=exec_q, order=order, ) - else: - if res_dt != out.dtype: - raise TypeError( - f"Output array of type {res_dt} is needed," - f"got {out.dtype}" - ) src1 = dpt.broadcast_to(src1, res_shape) src2 = dpt.broadcast_to(src2, res_shape) - ht_, _ = self.binary_fn_( + ht_binary_ev, binary_ev = self.binary_fn_( src1=src1, src2=src2, dst=out, sycl_queue=exec_q ) - ht_.wait() + if not (orig_out is None or orig_out is out): + # Copy the out data from temporary buffer to original memory + ht_copy_out_ev, _ = ti._copy_usm_ndarray_into_usm_ndarray( + src=out, + dst=orig_out, + sycl_queue=exec_q, + depends=[binary_ev], + ) + ht_copy_out_ev.wait() + out = orig_out + ht_binary_ev.wait() return out elif buf1_dt is None: if order == "K": @@ -578,15 +631,25 @@ def __call__(self, o1, o2, out=None, order="K"): src1 = dpt.broadcast_to(src1, res_shape) buf2 = dpt.broadcast_to(buf2, res_shape) - ht_, _ = self.binary_fn_( + ht_binary_ev, binary_ev = self.binary_fn_( src1=src1, src2=buf2, dst=out, sycl_queue=exec_q, depends=[copy_ev], ) + if not (orig_out is None or orig_out is out): + # Copy the out data from temporary buffer to original memory + ht_copy_out_ev, _ = ti._copy_usm_ndarray_into_usm_ndarray( + src=out, + dst=orig_out, + sycl_queue=exec_q, + depends=[binary_ev], + ) + ht_copy_out_ev.wait() + out = orig_out ht_copy_ev.wait() - ht_.wait() + ht_binary_ev.wait() return out elif buf2_dt is None: if order == "K": @@ -611,24 +674,28 @@ def __call__(self, o1, o2, out=None, order="K"): sycl_queue=exec_q, order=order, ) - else: - if res_dt != out.dtype: - raise TypeError( - f"Output array of type {res_dt} is needed," - f"got {out.dtype}" - ) buf1 = dpt.broadcast_to(buf1, res_shape) src2 = dpt.broadcast_to(src2, res_shape) - ht_, _ = self.binary_fn_( + ht_binary_ev, binary_ev = self.binary_fn_( src1=buf1, src2=src2, dst=out, sycl_queue=exec_q, depends=[copy_ev], ) + if not (orig_out is None or orig_out is out): + # Copy the out data from temporary buffer to original memory + ht_copy_out_ev, _ = ti._copy_usm_ndarray_into_usm_ndarray( + src=out, + dst=orig_out, + sycl_queue=exec_q, + depends=[binary_ev], + ) + ht_copy_out_ev.wait() + out = orig_out ht_copy_ev.wait() - ht_.wait() + ht_binary_ev.wait() return out if order in ["K", "A"]: @@ -665,11 +732,6 @@ def __call__(self, o1, o2, out=None, order="K"): sycl_queue=exec_q, order=order, ) - else: - if res_dt != out.dtype: - raise TypeError( - f"Output array of type {res_dt} is needed, got {out.dtype}" - ) buf1 = dpt.broadcast_to(buf1, res_shape) buf2 = dpt.broadcast_to(buf2, res_shape) @@ -682,116 +744,3 @@ def __call__(self, o1, o2, out=None, order="K"): ) dpctl.SyclEvent.wait_for([ht_copy1_ev, ht_copy2_ev, ht_]) return out - - def _inplace(self, lhs, val): - if self.binary_inplace_fn_ is None: - raise ValueError( - f"In-place operation not supported for ufunc '{self.name_}'" - ) - if not isinstance(lhs, dpt.usm_ndarray): - raise TypeError( - f"Expected dpctl.tensor.usm_ndarray, got {type(lhs)}" - ) - q1, lhs_usm_type = _get_queue_usm_type(lhs) - q2, val_usm_type = _get_queue_usm_type(val) - if q2 is None: - exec_q = q1 - usm_type = lhs_usm_type - else: - exec_q = dpctl.utils.get_execution_queue((q1, q2)) - if exec_q is None: - raise ExecutionPlacementError( - "Execution placement can not be unambiguously inferred " - "from input arguments." - ) - usm_type = dpctl.utils.get_coerced_usm_type( - ( - lhs_usm_type, - val_usm_type, - ) - ) - dpctl.utils.validate_usm_type(usm_type, allow_none=False) - lhs_shape = _get_shape(lhs) - val_shape = _get_shape(val) - if not all( - isinstance(s, (tuple, list)) - for s in ( - lhs_shape, - val_shape, - ) - ): - raise TypeError( - "Shape of arguments can not be inferred. " - "Arguments are expected to be " - "lists, tuples, or both" - ) - try: - res_shape = _broadcast_shape_impl( - [ - lhs_shape, - val_shape, - ] - ) - except ValueError: - raise ValueError( - "operands could not be broadcast together with shapes " - f"{lhs_shape} and {val_shape}" - ) - if res_shape != lhs_shape: - raise ValueError( - f"output shape {lhs_shape} does not match " - f"broadcast shape {res_shape}" - ) - sycl_dev = exec_q.sycl_device - lhs_dtype = lhs.dtype - val_dtype = _get_dtype(val, sycl_dev) - if not _validate_dtype(val_dtype): - raise ValueError("Input operand of unsupported type") - - lhs_dtype, val_dtype = _resolve_weak_types( - lhs_dtype, val_dtype, sycl_dev - ) - - buf_dt = _find_inplace_dtype( - lhs_dtype, val_dtype, self.result_type_resolver_fn_, sycl_dev - ) - - if buf_dt is None: - raise TypeError( - f"In-place '{self.name_}' does not support input types " - f"({lhs_dtype}, {val_dtype}), " - "and the inputs could not be safely coerced to any " - "supported types according to the casting rule ''safe''." - ) - - if isinstance(val, dpt.usm_ndarray): - rhs = val - overlap = ti._array_overlap(lhs, rhs) - else: - rhs = dpt.asarray(val, dtype=val_dtype, sycl_queue=exec_q) - overlap = False - - if buf_dt == val_dtype and overlap is False: - rhs = dpt.broadcast_to(rhs, res_shape) - ht_, _ = self.binary_inplace_fn_( - lhs=lhs, rhs=rhs, sycl_queue=exec_q - ) - ht_.wait() - - else: - buf = dpt.empty_like(rhs, dtype=buf_dt) - ht_copy_ev, copy_ev = ti._copy_usm_ndarray_into_usm_ndarray( - src=rhs, dst=buf, sycl_queue=exec_q - ) - - buf = dpt.broadcast_to(buf, res_shape) - ht_, _ = self.binary_inplace_fn_( - lhs=lhs, - rhs=buf, - sycl_queue=exec_q, - depends=[copy_ev], - ) - ht_copy_ev.wait() - ht_.wait() - - return lhs diff --git a/dpctl/tensor/_type_utils.py b/dpctl/tensor/_type_utils.py index b576764689..7f496b02fa 100644 --- a/dpctl/tensor/_type_utils.py +++ b/dpctl/tensor/_type_utils.py @@ -226,27 +226,9 @@ def _find_buf_dtype2(arg1_dtype, arg2_dtype, query_fn, sycl_dev, acceptance_fn): return None, None, None -def _find_inplace_dtype(lhs_dtype, rhs_dtype, query_fn, sycl_dev): - res_dt = query_fn(lhs_dtype, rhs_dtype) - if res_dt and res_dt == lhs_dtype: - return rhs_dtype - - _fp16 = sycl_dev.has_aspect_fp16 - _fp64 = sycl_dev.has_aspect_fp64 - all_dts = _all_data_types(_fp16, _fp64) - for buf_dt in all_dts: - if _can_cast(rhs_dtype, buf_dt, _fp16, _fp64): - res_dt = query_fn(lhs_dtype, buf_dt) - if res_dt and res_dt == lhs_dtype: - return buf_dt - - return None - - __all__ = [ "_find_buf_dtype", "_find_buf_dtype2", - "_find_inplace_dtype", "_to_device_supported_dtype", "_acceptance_fn_default", "_acceptance_fn_divide", diff --git a/dpctl/tensor/_usmarray.pyx b/dpctl/tensor/_usmarray.pyx index 1336063323..ebee97b5f2 100644 --- a/dpctl/tensor/_usmarray.pyx +++ b/dpctl/tensor/_usmarray.pyx @@ -1286,8 +1286,7 @@ cdef class usm_ndarray: return _dispatch_binary_elementwise2(other, "bitwise_xor", self) def __iadd__(self, other): - from ._elementwise_funcs import add - return add._inplace(self, other) + return dpctl.tensor.add(self, other, out=self) def __iand__(self, other): res = self.__and__(other) @@ -1325,8 +1324,7 @@ cdef class usm_ndarray: return self def __imul__(self, other): - from ._elementwise_funcs import multiply - return multiply._inplace(self, other) + return dpctl.tensor.multiply(self, other, out=self) def __ior__(self, other): res = self.__or__(other) @@ -1350,8 +1348,7 @@ cdef class usm_ndarray: return self def __isub__(self, other): - from ._elementwise_funcs import subtract - return subtract._inplace(self, other) + return dpctl.tensor.subtract(self, other, out=self) def __itruediv__(self, other): res = self.__truediv__(other) diff --git a/dpctl/tensor/libtensor/source/elementwise_functions.hpp b/dpctl/tensor/libtensor/source/elementwise_functions.hpp index 6c3543fb76..5c4b50bbc0 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions.hpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions.hpp @@ -379,9 +379,12 @@ std::pair py_binary_ufunc( } } - // check memory overlap auto const &overlap = dpctl::tensor::overlap::MemoryOverlap(); - if (overlap(src1, dst) || overlap(src2, dst)) { + auto const &same_logical_tensors = + dpctl::tensor::overlap::SameLogicalTensors(); + if ((overlap(src1, dst) && !same_logical_tensors(src1, dst)) || + (overlap(src2, dst) && !same_logical_tensors(src2, dst))) + { throw py::value_error("Arrays index overlapping segments of memory"); } // check memory overlap @@ -678,8 +681,10 @@ py_binary_inplace_ufunc(dpctl::tensor::usm_ndarray lhs, } // check memory overlap + auto const &same_logical_tensors = + dpctl::tensor::overlap::SameLogicalTensors(); auto const &overlap = dpctl::tensor::overlap::MemoryOverlap(); - if (overlap(rhs, lhs)) { + if (overlap(rhs, lhs) && !same_logical_tensors(rhs, lhs)) { throw py::value_error("Arrays index overlapping segments of memory"); } // check memory overlap From fc9a8da63dc1c526298c598c476a6d10c49e65c7 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Wed, 16 Aug 2023 16:17:05 -0700 Subject: [PATCH 2/8] Tests for new out parameter behavior for add --- dpctl/tests/elementwise/test_add.py | 42 ++++++++++++++++++++++++----- 1 file changed, 36 insertions(+), 6 deletions(-) diff --git a/dpctl/tests/elementwise/test_add.py b/dpctl/tests/elementwise/test_add.py index 8ba100d219..cd4e099064 100644 --- a/dpctl/tests/elementwise/test_add.py +++ b/dpctl/tests/elementwise/test_add.py @@ -381,17 +381,35 @@ def test_add_inplace_dtype_matrix(op1_dtype, op2_dtype): dpt.asnumpy(ar1) == np.full(ar1.shape, 2, dtype=ar1.dtype) ).all() - ar3 = dpt.ones(sz, dtype=op1_dtype) - ar4 = dpt.ones(2 * sz, dtype=op2_dtype) - - ar3[::-1] += ar4[::2] + ar3 = dpt.ones(sz, dtype=op1_dtype)[::-1] + ar4 = dpt.ones(2 * sz, dtype=op2_dtype)[::2] + ar3 += ar4 assert ( dpt.asnumpy(ar3) == np.full(ar3.shape, 2, dtype=ar3.dtype) ).all() - else: with pytest.raises(TypeError): ar1 += ar2 + dpt.add(ar1, ar2, out=ar1) + + # out is second arg + ar1 = dpt.ones(sz, dtype=op1_dtype) + ar2 = dpt.ones_like(ar1, dtype=op2_dtype) + if _can_cast(ar1.dtype, ar2.dtype, _fp16, _fp64): + dpt.add(ar1, ar2, out=ar2) + assert ( + dpt.asnumpy(ar2) == np.full(ar2.shape, 2, dtype=ar2.dtype) + ).all() + + ar3 = dpt.ones(sz, dtype=op1_dtype)[::-1] + ar4 = dpt.ones(2 * sz, dtype=op2_dtype)[::2] + dpt.add(ar3, ar4, out=ar4) + assert ( + dpt.asnumpy(ar4) == np.full(ar4.shape, 2, dtype=ar4.dtype) + ).all() + else: + with pytest.raises(TypeError): + dpt.add(ar1, ar2, out=ar2) def test_add_inplace_broadcasting(): @@ -403,6 +421,12 @@ def test_add_inplace_broadcasting(): m += v assert (dpt.asnumpy(m) == np.arange(1, 6, dtype="i4")[np.newaxis, :]).all() + # check case where second arg is out + dpt.add(v, m, out=m) + assert ( + dpt.asnumpy(m) == np.arange(10, dtype="i4")[np.newaxis, 1:10:2] + ).all() + def test_add_inplace_errors(): get_queue_or_skip() @@ -441,7 +465,7 @@ def test_add_inplace_errors(): ar1 += ar2 -def test_add_inplace_overlap(): +def test_add_inplace_same_tensors(): get_queue_or_skip() ar1 = dpt.ones(10, dtype="i4") @@ -451,7 +475,13 @@ def test_add_inplace_overlap(): ar1 = dpt.ones(10, dtype="i4") ar2 = dpt.ones(10, dtype="i4") dpt.add(ar1, ar2, out=ar1) + # all ar1 vals should be 2 assert (dpt.asnumpy(ar1) == np.full(ar1.shape, 2, dtype="i4")).all() dpt.add(ar2, ar1, out=ar2) + # all ar2 vals should be 3 assert (dpt.asnumpy(ar2) == np.full(ar2.shape, 3, dtype="i4")).all() + + dpt.add(ar1, ar2, out=ar2) + # all ar2 vals should be 5 + assert (dpt.asnumpy(ar2) == np.full(ar2.shape, 5, dtype="i4")).all() From 7eaab6dfc8f87bfff7809b71cf617c3f5862ad05 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Wed, 16 Aug 2023 21:49:12 -0700 Subject: [PATCH 3/8] Broadcasting made conditional in binary functions where memory overlap is possible - Broadcasting can change the values of strides without changing array shape --- dpctl/tensor/_elementwise_common.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/dpctl/tensor/_elementwise_common.py b/dpctl/tensor/_elementwise_common.py index 8ecd5f585e..d5485b2caf 100644 --- a/dpctl/tensor/_elementwise_common.py +++ b/dpctl/tensor/_elementwise_common.py @@ -510,7 +510,8 @@ def __call__(self, o1, o2, out=None, order="K"): o2, dtype=o2_dtype, sycl_queue=exec_q ) if buf2_dt is None: - src2 = dpt.broadcast_to(src2, res_shape) + if src2.shape != res_shape: + src2 = dpt.broadcast_to(src2, res_shape) ht_, _ = self.binary_inplace_fn_( lhs=o1, rhs=src2, sycl_queue=exec_q ) @@ -581,9 +582,10 @@ def __call__(self, o1, o2, out=None, order="K"): sycl_queue=exec_q, order=order, ) - - src1 = dpt.broadcast_to(src1, res_shape) - src2 = dpt.broadcast_to(src2, res_shape) + if src1.shape != res_shape: + src1 = dpt.broadcast_to(src1, res_shape) + if src2.shape != res_shape: + src2 = dpt.broadcast_to(src2, res_shape) ht_binary_ev, binary_ev = self.binary_fn_( src1=src1, src2=src2, dst=out, sycl_queue=exec_q ) @@ -628,8 +630,8 @@ def __call__(self, o1, o2, out=None, order="K"): f"Output array of type {res_dt} is needed," f"got {out.dtype}" ) - - src1 = dpt.broadcast_to(src1, res_shape) + if src1.shape != res_shape: + src1 = dpt.broadcast_to(src1, res_shape) buf2 = dpt.broadcast_to(buf2, res_shape) ht_binary_ev, binary_ev = self.binary_fn_( src1=src1, @@ -676,7 +678,8 @@ def __call__(self, o1, o2, out=None, order="K"): ) buf1 = dpt.broadcast_to(buf1, res_shape) - src2 = dpt.broadcast_to(src2, res_shape) + if src2.shape != res_shape: + src2 = dpt.broadcast_to(src2, res_shape) ht_binary_ev, binary_ev = self.binary_fn_( src1=buf1, src2=src2, From 28d699c4856a9b7606c01def9d092ee4864af0c4 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Thu, 17 Aug 2023 16:29:25 -0500 Subject: [PATCH 4/8] Changed exception types raised Use ExecutionPlacementError for CFD violations. Use ValueError is types of input are as expected, but values are not as expected. --- dpctl/tensor/_elementwise_common.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/dpctl/tensor/_elementwise_common.py b/dpctl/tensor/_elementwise_common.py index d5485b2caf..acc216a696 100644 --- a/dpctl/tensor/_elementwise_common.py +++ b/dpctl/tensor/_elementwise_common.py @@ -78,8 +78,8 @@ def __call__(self, x, out=None, order="K"): ) if out.shape != x.shape: - raise TypeError( - "The shape of input and output arrays are inconsistent." + raise ValueError( + "The shape of input and output arrays are inconsistent. " f"Expected output shape is {x.shape}, got {out.shape}" ) @@ -103,7 +103,7 @@ def __call__(self, x, out=None, order="K"): dpctl.utils.get_execution_queue((x.sycl_queue, out.sycl_queue)) is None ): - raise TypeError( + raise ExecutionPlacementError( "Input and output allocation queues are not compatible" ) @@ -471,8 +471,8 @@ def __call__(self, o1, o2, out=None, order="K"): ) if out.shape != res_shape: - raise TypeError( - "The shape of input and output arrays are inconsistent." + raise ValueError( + "The shape of input and output arrays are inconsistent. " f"Expected output shape is {o1_shape}, got {out.shape}" ) @@ -486,7 +486,7 @@ def __call__(self, o1, o2, out=None, order="K"): dpctl.utils.get_execution_queue((exec_q, out.sycl_queue)) is None ): - raise TypeError( + raise ExecutionPlacementError( "Input and output allocation queues are not compatible" ) From 1d10f86ab3f8ce94b99c502c17a3ba088dd8da82 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Thu, 17 Aug 2023 16:30:26 -0500 Subject: [PATCH 5/8] Adding tests to improve coverage Removed tests expecting error raised in case of overlapping inputs. Added tests guided by coverage report. --- dpctl/tests/elementwise/test_add.py | 64 +++++++++++++++++++++-------- 1 file changed, 48 insertions(+), 16 deletions(-) diff --git a/dpctl/tests/elementwise/test_add.py b/dpctl/tests/elementwise/test_add.py index cd4e099064..891dda5252 100644 --- a/dpctl/tests/elementwise/test_add.py +++ b/dpctl/tests/elementwise/test_add.py @@ -175,7 +175,7 @@ def test_add_broadcasting_new_shape(): ).all() r3 = dpt.empty_like(ar1) - with pytest.raises(TypeError): + with pytest.raises(ValueError): dpt.add(ar1, ar2, out=r3) ar3 = dpt.ones((6, 1), dtype="i4") @@ -273,7 +273,7 @@ def test_add_errors(): ar2 = dpt.ones_like(ar1, sycl_queue=gpu_queue) y = dpt.empty_like(ar1, sycl_queue=cpu_queue) assert_raises_regex( - TypeError, + ExecutionPlacementError, "Input and output allocation queues are not compatible", dpt.add, ar1, @@ -285,7 +285,7 @@ def test_add_errors(): ar2 = dpt.ones_like(ar1, dtype="int32") y = dpt.empty(3) assert_raises_regex( - TypeError, + ValueError, "The shape of input and output arrays are inconsistent", dpt.add, ar1, @@ -293,19 +293,6 @@ def test_add_errors(): y, ) - ar1 = dpt.ones(2, dtype="float32") - ar2 = dpt.ones_like(ar1, dtype="int32") - # identical view but a different object - y = ar1[:] - assert_raises_regex( - TypeError, - "Input and output arrays have memory overlap", - dpt.add, - ar1, - ar2, - y, - ) - ar1 = np.ones(2, dtype="float32") ar2 = np.ones_like(ar1, dtype="int32") assert_raises_regex( @@ -485,3 +472,48 @@ def test_add_inplace_same_tensors(): dpt.add(ar1, ar2, out=ar2) # all ar2 vals should be 5 assert (dpt.asnumpy(ar2) == np.full(ar2.shape, 5, dtype="i4")).all() + + +def test_add_str_repr(): + add_s = str(dpt.add) + assert isinstance(add_s, str) + assert "add" in add_s + + add_r = repr(dpt.add) + assert isinstance(add_r, str) + assert "add" in add_r + + +def test_add_cfd(): + q1 = get_queue_or_skip() + q2 = dpctl.SyclQueue(q1.sycl_device) + + x1 = dpt.ones(10, sycl_queue=q1) + x2 = dpt.ones(10, sycl_queue=q2) + with pytest.raises(ExecutionPlacementError): + dpt.add(x1, x2) + + with pytest.raises(ExecutionPlacementError): + dpt.add(x1, x1, out=x2) + + +def test_add_out_type_check(): + get_queue_or_skip() + + x1 = dpt.ones(10) + x2 = dpt.ones(10) + + out = range(10) + + with pytest.raises(TypeError): + dpt.add(x1, x2, out=out) + + +def test_add_out_need_temporary(): + get_queue_or_skip() + + x = dpt.ones(10, dtype="u4") + + dpt.add(x[:6], 1, out=x[-6:]) + + assert dpt.all(x[:-6] == 1) and dpt.all(x[-6:] == 2) From 0017a7d248c010fc0cd606e2c377fe1c87196e63 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Thu, 17 Aug 2023 19:43:23 -0500 Subject: [PATCH 6/8] Removed provably unreachable branches in _resolve_weak_types Since o1_dtype_kind_num > o2_dtype_kind_num, o1 can be not be weak boolean type, since it has the lowest kind number in the hierarchy. --- dpctl/tensor/_elementwise_common.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/dpctl/tensor/_elementwise_common.py b/dpctl/tensor/_elementwise_common.py index acc216a696..002b0ef5ec 100644 --- a/dpctl/tensor/_elementwise_common.py +++ b/dpctl/tensor/_elementwise_common.py @@ -301,8 +301,6 @@ def _resolve_weak_types(o1_dtype, o2_dtype, dev): o1_kind_num = _weak_type_num_kind(o1_dtype) o2_kind_num = _strong_dtype_num_kind(o2_dtype) if o1_kind_num > o2_kind_num: - if isinstance(o1_dtype, WeakBooleanType): - return dpt.bool, o2_dtype if isinstance(o1_dtype, WeakIntegralType): return dpt.int64, o2_dtype if isinstance(o1_dtype, WeakComplexType): @@ -322,8 +320,6 @@ def _resolve_weak_types(o1_dtype, o2_dtype, dev): o1_kind_num = _strong_dtype_num_kind(o1_dtype) o2_kind_num = _weak_type_num_kind(o2_dtype) if o2_kind_num > o1_kind_num: - if isinstance(o2_dtype, WeakBooleanType): - return o1_dtype, dpt.bool if isinstance(o2_dtype, WeakIntegralType): return o1_dtype, dpt.int64 if isinstance(o2_dtype, WeakComplexType): From 3371abd4b0184064433f1d82ce47f05097a8c66c Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Thu, 17 Aug 2023 18:15:39 -0700 Subject: [PATCH 7/8] All in-place operators now use call operator of BinaryElementwiseFunc --- dpctl/tensor/_usmarray.pyx | 54 +++++++------------------------------- 1 file changed, 9 insertions(+), 45 deletions(-) diff --git a/dpctl/tensor/_usmarray.pyx b/dpctl/tensor/_usmarray.pyx index ebee97b5f2..a886e0eae7 100644 --- a/dpctl/tensor/_usmarray.pyx +++ b/dpctl/tensor/_usmarray.pyx @@ -1289,25 +1289,13 @@ cdef class usm_ndarray: return dpctl.tensor.add(self, other, out=self) def __iand__(self, other): - res = self.__and__(other) - if res is NotImplemented: - return res - self.__setitem__(Ellipsis, res) - return self + return dpctl.tensor.bitwise_and(self, other, out=self) def __ifloordiv__(self, other): - res = self.__floordiv__(other) - if res is NotImplemented: - return res - self.__setitem__(Ellipsis, res) - return self + return dpctl.tensor.floor_divide(self, other, out=self) def __ilshift__(self, other): - res = self.__lshift__(other) - if res is NotImplemented: - return res - self.__setitem__(Ellipsis, res) - return self + return dpctl.tensor.bitwise_left_shift(self, other, out=self) def __imatmul__(self, other): res = self.__matmul__(other) @@ -1317,52 +1305,28 @@ cdef class usm_ndarray: return self def __imod__(self, other): - res = self.__mod__(other) - if res is NotImplemented: - return res - self.__setitem__(Ellipsis, res) - return self + return dpctl.tensor.remainder(self, other, out=self) def __imul__(self, other): return dpctl.tensor.multiply(self, other, out=self) def __ior__(self, other): - res = self.__or__(other) - if res is NotImplemented: - return res - self.__setitem__(Ellipsis, res) - return self + return dpctl.tensor.bitwise_or(self, other, out=self) def __ipow__(self, other): - res = self.__pow__(other, None) - if res is NotImplemented: - return res - self.__setitem__(Ellipsis, res) - return self + return dpctl.tensor.pow(self, other, out=self) def __irshift__(self, other): - res = self.__rshift__(other) - if res is NotImplemented: - return res - self.__setitem__(Ellipsis, res) - return self + return dpctl.tensor.bitwise_right_shift(self, other, out=self) def __isub__(self, other): return dpctl.tensor.subtract(self, other, out=self) def __itruediv__(self, other): - res = self.__truediv__(other) - if res is NotImplemented: - return res - self.__setitem__(Ellipsis, res) - return self + return dpctl.tensor.divide(self, other, out=self) def __ixor__(self, other): - res = self.__xor__(other) - if res is NotImplemented: - return res - self.__setitem__(Ellipsis, res) - return self + return dpctl.tensor.bitwise_xor(self, other, out=self) def __str__(self): return usm_ndarray_str(self) From 2a749f6e508917f1c6b0160a87648b0f4891f8de Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Fri, 18 Aug 2023 11:14:14 -0700 Subject: [PATCH 8/8] Removed some redundant and obsolete tests - Removed from test_floor_ceil_trunc, test_hyperbolic, test_trigonometric, and test_logaddexp - These tests would fail on GPU but never run on CPU, and therefore were not impacting the coverage - These tests focused on aspects of the BinaryElementwiseFunc class rather than the behavior of the operator --- .../elementwise/test_floor_ceil_trunc.py | 40 ------- dpctl/tests/elementwise/test_hyperbolic.py | 40 ------- dpctl/tests/elementwise/test_logaddexp.py | 106 ------------------ dpctl/tests/elementwise/test_trigonometric.py | 40 ------- 4 files changed, 226 deletions(-) diff --git a/dpctl/tests/elementwise/test_floor_ceil_trunc.py b/dpctl/tests/elementwise/test_floor_ceil_trunc.py index e79e049893..211c423ee6 100644 --- a/dpctl/tests/elementwise/test_floor_ceil_trunc.py +++ b/dpctl/tests/elementwise/test_floor_ceil_trunc.py @@ -24,7 +24,6 @@ assert_raises_regex, ) -import dpctl import dpctl.tensor as dpt from dpctl.tests.helper import get_queue_or_skip, skip_if_dtype_not_supported @@ -92,45 +91,6 @@ def test_floor_ceil_trunc_order(np_call, dpt_call, dtype): assert_allclose(dpt.asnumpy(Y), expected_Y) -@pytest.mark.parametrize("dpt_call", [dpt.floor, dpt.ceil, dpt.trunc]) -def test_floor_ceil_trunc_errors(dpt_call): - get_queue_or_skip() - try: - gpu_queue = dpctl.SyclQueue("gpu") - except dpctl.SyclQueueCreationError: - pytest.skip("SyclQueue('gpu') failed, skipping") - try: - cpu_queue = dpctl.SyclQueue("cpu") - except dpctl.SyclQueueCreationError: - pytest.skip("SyclQueue('cpu') failed, skipping") - - x = dpt.zeros(2, sycl_queue=gpu_queue) - y = dpt.empty_like(x, sycl_queue=cpu_queue) - assert_raises_regex( - TypeError, - "Input and output allocation queues are not compatible", - dpt_call, - x, - y, - ) - - x = dpt.zeros(2) - y = dpt.empty(3) - assert_raises_regex( - TypeError, - "The shape of input and output arrays are inconsistent", - dpt_call, - x, - y, - ) - - x = dpt.zeros(2, dtype="float32") - y = np.empty_like(x) - assert_raises_regex( - TypeError, "output array must be of usm_ndarray type", dpt_call, x, y - ) - - @pytest.mark.parametrize("dpt_call", [dpt.floor, dpt.ceil, dpt.trunc]) @pytest.mark.parametrize("dtype", _real_value_dtypes) def test_floor_ceil_trunc_error_dtype(dpt_call, dtype): diff --git a/dpctl/tests/elementwise/test_hyperbolic.py b/dpctl/tests/elementwise/test_hyperbolic.py index 2a7c3a6a53..401249443e 100644 --- a/dpctl/tests/elementwise/test_hyperbolic.py +++ b/dpctl/tests/elementwise/test_hyperbolic.py @@ -20,7 +20,6 @@ import pytest from numpy.testing import assert_allclose, assert_raises_regex -import dpctl import dpctl.tensor as dpt from dpctl.tests.helper import get_queue_or_skip, skip_if_dtype_not_supported @@ -178,45 +177,6 @@ def test_hyper_order(np_call, dpt_call, dtype): assert_allclose(dpt.asnumpy(Y), expected_Y, atol=tol, rtol=tol) -@pytest.mark.parametrize("callable", _dpt_funcs) -def test_hyper_errors(callable): - get_queue_or_skip() - try: - gpu_queue = dpctl.SyclQueue("gpu") - except dpctl.SyclQueueCreationError: - pytest.skip("SyclQueue('gpu') failed, skipping") - try: - cpu_queue = dpctl.SyclQueue("cpu") - except dpctl.SyclQueueCreationError: - pytest.skip("SyclQueue('cpu') failed, skipping") - - x = dpt.ones(2, sycl_queue=gpu_queue) - y = dpt.empty_like(x, sycl_queue=cpu_queue) - assert_raises_regex( - TypeError, - "Input and output allocation queues are not compatible", - callable, - x, - y, - ) - - x = dpt.ones(2) - y = dpt.empty(3) - assert_raises_regex( - TypeError, - "The shape of input and output arrays are inconsistent", - callable, - x, - y, - ) - - x = dpt.ones(2, dtype="float32") - y = np.empty_like(x) - assert_raises_regex( - TypeError, "output array must be of usm_ndarray type", callable, x, y - ) - - @pytest.mark.parametrize("callable", _dpt_funcs) @pytest.mark.parametrize("dtype", _all_dtypes) def test_hyper_error_dtype(callable, dtype): diff --git a/dpctl/tests/elementwise/test_logaddexp.py b/dpctl/tests/elementwise/test_logaddexp.py index a693389354..6e894e0843 100644 --- a/dpctl/tests/elementwise/test_logaddexp.py +++ b/dpctl/tests/elementwise/test_logaddexp.py @@ -23,7 +23,6 @@ import dpctl import dpctl.tensor as dpt from dpctl.tests.helper import get_queue_or_skip, skip_if_dtype_not_supported -from dpctl.utils import ExecutionPlacementError from .utils import _compare_dtypes, _no_complex_dtypes, _usm_types @@ -178,111 +177,6 @@ def test_logaddexp_python_scalar(arr_dt): assert isinstance(R, dpt.usm_ndarray) -class MockArray: - def __init__(self, arr): - self.data_ = arr - - @property - def __sycl_usm_array_interface__(self): - return self.data_.__sycl_usm_array_interface__ - - -def test_logaddexp_mock_array(): - get_queue_or_skip() - a = dpt.arange(10) - b = dpt.ones(10) - c = MockArray(b) - r = dpt.logaddexp(a, c) - assert isinstance(r, dpt.usm_ndarray) - - -def test_logaddexp_canary_mock_array(): - get_queue_or_skip() - a = dpt.arange(10) - - class Canary: - def __init__(self): - pass - - @property - def __sycl_usm_array_interface__(self): - return None - - c = Canary() - with pytest.raises(ValueError): - dpt.logaddexp(a, c) - - -def test_logaddexp_errors(): - get_queue_or_skip() - try: - gpu_queue = dpctl.SyclQueue("gpu") - except dpctl.SyclQueueCreationError: - pytest.skip("SyclQueue('gpu') failed, skipping") - try: - cpu_queue = dpctl.SyclQueue("cpu") - except dpctl.SyclQueueCreationError: - pytest.skip("SyclQueue('cpu') failed, skipping") - - ar1 = dpt.ones(2, dtype="float32", sycl_queue=gpu_queue) - ar2 = dpt.ones_like(ar1, sycl_queue=gpu_queue) - y = dpt.empty_like(ar1, sycl_queue=cpu_queue) - assert_raises_regex( - TypeError, - "Input and output allocation queues are not compatible", - dpt.logaddexp, - ar1, - ar2, - y, - ) - - ar1 = dpt.ones(2, dtype="float32") - ar2 = dpt.ones_like(ar1, dtype="int32") - y = dpt.empty(3) - assert_raises_regex( - TypeError, - "The shape of input and output arrays are inconsistent", - dpt.logaddexp, - ar1, - ar2, - y, - ) - - ar1 = dpt.ones(2, dtype="float32") - ar2 = dpt.ones_like(ar1, dtype="int32") - y = ar1 - assert_raises_regex( - TypeError, - "Input and output arrays have memory overlap", - dpt.logaddexp, - ar1, - ar2, - y, - ) - - ar1 = np.ones(2, dtype="float32") - ar2 = np.ones_like(ar1, dtype="int32") - assert_raises_regex( - ExecutionPlacementError, - "Execution placement can not be unambiguously inferred.*", - dpt.logaddexp, - ar1, - ar2, - ) - - ar1 = dpt.ones(2, dtype="float32") - ar2 = dpt.ones_like(ar1, dtype="int32") - y = np.empty_like(ar1) - assert_raises_regex( - TypeError, - "output array must be of usm_ndarray type", - dpt.logaddexp, - ar1, - ar2, - y, - ) - - @pytest.mark.parametrize("dtype", _no_complex_dtypes) def test_logaddexp_dtype_error( dtype, diff --git a/dpctl/tests/elementwise/test_trigonometric.py b/dpctl/tests/elementwise/test_trigonometric.py index 42c8453968..e947d9e469 100644 --- a/dpctl/tests/elementwise/test_trigonometric.py +++ b/dpctl/tests/elementwise/test_trigonometric.py @@ -20,7 +20,6 @@ import pytest from numpy.testing import assert_allclose, assert_raises_regex -import dpctl import dpctl.tensor as dpt from dpctl.tests.helper import get_queue_or_skip, skip_if_dtype_not_supported @@ -175,45 +174,6 @@ def test_trig_order(np_call, dpt_call, dtype): assert_allclose(dpt.asnumpy(Y), expected_Y, atol=tol, rtol=tol) -@pytest.mark.parametrize("callable", _dpt_funcs) -def test_trig_errors(callable): - get_queue_or_skip() - try: - gpu_queue = dpctl.SyclQueue("gpu") - except dpctl.SyclQueueCreationError: - pytest.skip("SyclQueue('gpu') failed, skipping") - try: - cpu_queue = dpctl.SyclQueue("cpu") - except dpctl.SyclQueueCreationError: - pytest.skip("SyclQueue('cpu') failed, skipping") - - x = dpt.zeros(2, sycl_queue=gpu_queue) - y = dpt.empty_like(x, sycl_queue=cpu_queue) - assert_raises_regex( - TypeError, - "Input and output allocation queues are not compatible", - callable, - x, - y, - ) - - x = dpt.zeros(2) - y = dpt.empty(3) - assert_raises_regex( - TypeError, - "The shape of input and output arrays are inconsistent", - callable, - x, - y, - ) - - x = dpt.zeros(2, dtype="float32") - y = np.empty_like(x) - assert_raises_regex( - TypeError, "output array must be of usm_ndarray type", callable, x, y - ) - - @pytest.mark.parametrize("callable", _dpt_funcs) @pytest.mark.parametrize("dtype", _all_dtypes) def test_trig_error_dtype(callable, dtype):