From f77f7a4d9bdb5aa75d1e267703d818681e99510d Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Wed, 9 Aug 2023 10:10:27 -0500 Subject: [PATCH 1/8] Support stream keyword in usm_ndarray.to_device per array API --- dpctl/tensor/_usmarray.pyx | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/dpctl/tensor/_usmarray.pyx b/dpctl/tensor/_usmarray.pyx index 20a226bd3e..59ffb3238f 100644 --- a/dpctl/tensor/_usmarray.pyx +++ b/dpctl/tensor/_usmarray.pyx @@ -816,7 +816,7 @@ cdef class usm_ndarray: return _take_multi_index(res, adv_ind, adv_ind_start_p) - def to_device(self, target): + def to_device(self, target, stream=None): """ to_device(target_device) Transfers this array to specified target device. @@ -856,6 +856,14 @@ cdef class usm_ndarray: cdef c_dpctl.DPCTLSyclQueueRef QRef = NULL cdef c_dpmem._Memory arr_buf d = Device.create_device(target) + + if (stream is None or type(stream) is not dpctl.SyclQueue or + stream == self.sycl_queue): + pass + else: + ev = self.sycl_queue.submit_barrier() + stream.submit_barrier(dependent_events=[ev]) + if (d.sycl_context == self.sycl_context): arr_buf = self.usm_data QRef = ( d.sycl_queue).get_queue_ref() From c368034de3c0b0aad022191320270ee59d160cda Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Wed, 9 Aug 2023 10:10:53 -0500 Subject: [PATCH 2/8] Fix for gh-1330 When source array must be broadcast, non-zero strides corresponding to unit size dimensions must be zeroed out, otherwise if such dimension is broadcasted, constructor would expected a bigger buffer than is really necessary. --- dpctl/tensor/_copy_utils.py | 5 ++++- dpctl/tests/test_usm_ndarray_ctor.py | 9 +++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/dpctl/tensor/_copy_utils.py b/dpctl/tensor/_copy_utils.py index 565a11dec7..701251da48 100644 --- a/dpctl/tensor/_copy_utils.py +++ b/dpctl/tensor/_copy_utils.py @@ -279,7 +279,10 @@ def _copy_from_usm_ndarray_to_usm_ndarray(dst, src): common_shape = common_shape[ones_count:] if src.ndim < len(common_shape): - new_src_strides = (0,) * (len(common_shape) - src.ndim) + src.strides + pad_count = len(common_shape) - src.ndim + new_src_strides = (0,) * pad_count + tuple( + s if d > 1 else 0 for s, d in zip(src.strides, src.shape) + ) src_same_shape = dpt.usm_ndarray( common_shape, dtype=src.dtype, buffer=src, strides=new_src_strides ) diff --git a/dpctl/tests/test_usm_ndarray_ctor.py b/dpctl/tests/test_usm_ndarray_ctor.py index 50d8e2479a..af35e5af10 100644 --- a/dpctl/tests/test_usm_ndarray_ctor.py +++ b/dpctl/tests/test_usm_ndarray_ctor.py @@ -1012,6 +1012,15 @@ def test_setitem_same_dtype(dtype, src_usm_type, dst_usm_type): Zusm_empty[Ellipsis] = Zusm_3d[0, 0, 0:0] +def test_setitem_boradcasting(): + get_queue_or_skip() + dst = dpt.ones((2, 3, 4), dtype="u4") + src = dpt.zeros((3, 1), dtype=dst.dtype) + dst[...] = src + expected = np.zeros(dst.shape, dtype=dst.dtype) + assert np.array_equal(dpt.asnumpy(dst), expected) + + @pytest.mark.parametrize( "dtype", _all_dtypes, From ef5fe174792e33c55c38a2c21d18da6330fd17b2 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Wed, 9 Aug 2023 12:53:17 -0500 Subject: [PATCH 3/8] Fixed grammar in exception message text --- dpctl/tensor/_indexing_functions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dpctl/tensor/_indexing_functions.py b/dpctl/tensor/_indexing_functions.py index d73a2e7952..188f3cd291 100644 --- a/dpctl/tensor/_indexing_functions.py +++ b/dpctl/tensor/_indexing_functions.py @@ -343,5 +343,5 @@ def nonzero(arr): "Expecting dpctl.tensor.usm_ndarray type, " f"got {type(arr)}" ) if arr.ndim == 0: - raise ValueError("Array of positive rank is exepcted") + raise ValueError("Array of positive rank is expected") return _nonzero_impl(arr) From 53315a96e5f07a52d78fcfb40079c0ada41803bb Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Wed, 9 Aug 2023 16:22:32 -0500 Subject: [PATCH 4/8] Fixed bug introduced in 5c1a961afee913e675c92a7ecc7d15ab308f61d8 This commit short-circuited broadcastability of shapes for zero-size rhs in __setitem__. Refix array API test failure without introducing this regression and reuse _manipulation_functions._broadcast_strides as suggested by @ndgrigorian --- dpctl/tensor/_copy_utils.py | 24 ++++++++++++++++++++---- dpctl/tensor/_manipulation_functions.py | 18 +----------------- dpctl/tensor/_usmarray.pyx | 2 -- 3 files changed, 21 insertions(+), 23 deletions(-) diff --git a/dpctl/tensor/_copy_utils.py b/dpctl/tensor/_copy_utils.py index 701251da48..23e566e67d 100644 --- a/dpctl/tensor/_copy_utils.py +++ b/dpctl/tensor/_copy_utils.py @@ -246,6 +246,23 @@ def _broadcast_shapes(sh1, sh2): ).shape +def _broadcast_strides(X_shape, X_strides, res_ndim): + """ + Broadcasts strides to match the given dimensions; + returns tuple type strides. + """ + out_strides = [0] * res_ndim + X_shape_len = len(X_shape) + str_dim = -X_shape_len + for i in range(X_shape_len): + shape_value = X_shape[i] + if not shape_value == 1: + out_strides[str_dim] = X_strides[i] + str_dim += 1 + + return tuple(out_strides) + + def _copy_from_usm_ndarray_to_usm_ndarray(dst, src): if any( not isinstance(arg, dpt.usm_ndarray) @@ -268,7 +285,7 @@ def _copy_from_usm_ndarray_to_usm_ndarray(dst, src): except ValueError as exc: raise ValueError("Shapes of two arrays are not compatible") from exc - if dst.size < src.size: + if dst.size < src.size and dst.size < np.prod(common_shape): raise ValueError("Destination is smaller ") if len(common_shape) > dst.ndim: @@ -279,9 +296,8 @@ def _copy_from_usm_ndarray_to_usm_ndarray(dst, src): common_shape = common_shape[ones_count:] if src.ndim < len(common_shape): - pad_count = len(common_shape) - src.ndim - new_src_strides = (0,) * pad_count + tuple( - s if d > 1 else 0 for s, d in zip(src.strides, src.shape) + new_src_strides = _broadcast_strides( + src.shape, src.strides, len(common_shape) ) src_same_shape = dpt.usm_ndarray( common_shape, dtype=src.dtype, buffer=src, strides=new_src_strides diff --git a/dpctl/tensor/_manipulation_functions.py b/dpctl/tensor/_manipulation_functions.py index 26c1ab60cf..9406e386af 100644 --- a/dpctl/tensor/_manipulation_functions.py +++ b/dpctl/tensor/_manipulation_functions.py @@ -25,6 +25,7 @@ import dpctl.tensor._tensor_impl as ti import dpctl.utils as dputils +from ._copy_utils import _broadcast_strides from ._type_utils import _to_device_supported_dtype __doc__ = ( @@ -120,23 +121,6 @@ def __repr__(self): return self._finfo.__repr__() -def _broadcast_strides(X_shape, X_strides, res_ndim): - """ - Broadcasts strides to match the given dimensions; - returns tuple type strides. - """ - out_strides = [0] * res_ndim - X_shape_len = len(X_shape) - str_dim = -X_shape_len - for i in range(X_shape_len): - shape_value = X_shape[i] - if not shape_value == 1: - out_strides[str_dim] = X_strides[i] - str_dim += 1 - - return tuple(out_strides) - - def _broadcast_shape_impl(shapes): if len(set(shapes)) == 1: return shapes[0] diff --git a/dpctl/tensor/_usmarray.pyx b/dpctl/tensor/_usmarray.pyx index 59ffb3238f..98986d84d6 100644 --- a/dpctl/tensor/_usmarray.pyx +++ b/dpctl/tensor/_usmarray.pyx @@ -1175,8 +1175,6 @@ cdef class usm_ndarray: if adv_ind_start_p < 0: # basic slicing if isinstance(rhs, usm_ndarray): - if Xv.size == 0: - return _copy_from_usm_ndarray_to_usm_ndarray(Xv, rhs) else: if hasattr(rhs, "__sycl_usm_array_interface__"): From 8b03642adb6f9dcd76c3a7fd01275c15057d0318 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Wed, 9 Aug 2023 16:33:03 -0500 Subject: [PATCH 5/8] Added tests for examples fixed in prev. commit --- dpctl/tests/test_usm_ndarray_ctor.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/dpctl/tests/test_usm_ndarray_ctor.py b/dpctl/tests/test_usm_ndarray_ctor.py index af35e5af10..03f426b9d4 100644 --- a/dpctl/tests/test_usm_ndarray_ctor.py +++ b/dpctl/tests/test_usm_ndarray_ctor.py @@ -1012,7 +1012,7 @@ def test_setitem_same_dtype(dtype, src_usm_type, dst_usm_type): Zusm_empty[Ellipsis] = Zusm_3d[0, 0, 0:0] -def test_setitem_boradcasting(): +def test_setitem_broadcasting(): get_queue_or_skip() dst = dpt.ones((2, 3, 4), dtype="u4") src = dpt.zeros((3, 1), dtype=dst.dtype) @@ -1021,6 +1021,24 @@ def test_setitem_boradcasting(): assert np.array_equal(dpt.asnumpy(dst), expected) +def test_setitem_broadcasting_empty_dst_validation(): + "Broadcasting rules apply, except exception" + get_queue_or_skip() + dst = dpt.ones((2, 0, 5, 4), dtype="i8") + src = dpt.ones((2, 0, 3, 4), dtype="i8") + with pytest.raises(ValueError): + dst[...] = src + + +def test_setitem_broadcasting_empty_dst_edge_case(): + """RHS is shunken to empty array by + broadasting rule, hence no exception""" + get_queue_or_skip() + dst = dpt.ones(1, dtype="i8")[0:0] + src = dpt.ones(tuple(), dtype="i8") + dst[...] = src + + @pytest.mark.parametrize( "dtype", _all_dtypes, From 346200eb8c4fb1ce0c12918391e52e998f2cde6b Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Thu, 10 Aug 2023 07:13:50 -0500 Subject: [PATCH 6/8] Fixed example for @antonwolfy, added tests based on it Also added comment to explain the logic of the code. --- dpctl/tensor/_copy_utils.py | 22 ++++++++++++++++++++-- dpctl/tests/test_usm_ndarray_ctor.py | 20 ++++++++++++++++++++ 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/dpctl/tensor/_copy_utils.py b/dpctl/tensor/_copy_utils.py index 23e566e67d..a1b154f4d1 100644 --- a/dpctl/tensor/_copy_utils.py +++ b/dpctl/tensor/_copy_utils.py @@ -302,9 +302,27 @@ def _copy_from_usm_ndarray_to_usm_ndarray(dst, src): src_same_shape = dpt.usm_ndarray( common_shape, dtype=src.dtype, buffer=src, strides=new_src_strides ) + elif src.ndim == len(common_shape): + new_src_strides = _broadcast_strides( + src.shape, src.strides, len(common_shape) + ) + src_same_shape = dpt.usm_ndarray( + common_shape, dtype=src.dtype, buffer=src, strides=new_src_strides + ) else: - src_same_shape = src - src_same_shape.shape = common_shape + # since broadcasting succeeded, src.ndim is greater because of + # leading sequence of ones, so we trim it + n = len(common_shape) + new_src_strides = _broadcast_strides( + src.shape[-n:], src.strides[-n:], n + ) + src_same_shape = dpt.usm_ndarray( + common_shape, + dtype=src.dtype, + buffer=src.usm_data, + strides=new_src_strides, + offset=src._element_offset, + ) _copy_same_shape(dst, src_same_shape) diff --git a/dpctl/tests/test_usm_ndarray_ctor.py b/dpctl/tests/test_usm_ndarray_ctor.py index 03f426b9d4..01135cb7c3 100644 --- a/dpctl/tests/test_usm_ndarray_ctor.py +++ b/dpctl/tests/test_usm_ndarray_ctor.py @@ -1039,6 +1039,26 @@ def test_setitem_broadcasting_empty_dst_edge_case(): dst[...] = src +def test_setitem_broadcasting_src_ndim_equal_dst_ndim(): + get_queue_or_skip() + dst = dpt.ones((2, 3, 4), dtype="i4") + src = dpt.zeros((2, 1, 4), dtype="i4") + dst[...] = src + + expected = np.zeros(dst.shape, dtype=dst.dtype) + assert np.array_equal(dpt.asnumpy(dst), expected) + + +def test_setitem_broadcasting_src_ndim_greater_than_dst_ndim(): + get_queue_or_skip() + dst = dpt.ones((2, 3, 4), dtype="i4") + src = dpt.zeros((1, 2, 1, 4), dtype="i4") + dst[...] = src + + expected = np.zeros(dst.shape, dtype=dst.dtype) + assert np.array_equal(dpt.asnumpy(dst), expected) + + @pytest.mark.parametrize( "dtype", _all_dtypes, From f5b96b760dfee784013fe7c8e03201ad8eb0fc04 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Thu, 10 Aug 2023 21:30:29 -0500 Subject: [PATCH 7/8] Fixes gh-1334 A non-empty array which is effectively 1D (only one dimension has size greater than one) should be marked as both C- and F- contiguous. ``` In [1]: import dpctl.tensor as dpt In [2]: a = dpt.ones((2, 3)) ...: dpt.reshape(a, (1, 6, 1)).flags Out[2]: C_CONTIGUOUS : True F_CONTIGUOUS : True WRITABLE : True In [3]: a = dpt.ones((2, 3), order='F') ...: dpt.reshape(a, (1, 6, 1), order='F').flags Out[3]: C_CONTIGUOUS : True F_CONTIGUOUS : True WRITABLE : True In [4]: a = dpt.ones((2, 3, 4)) ...: dpt.sum(a, axis=(1, 2), keepdims=True).flags Out[4]: C_CONTIGUOUS : True F_CONTIGUOUS : True WRITABLE : True ``` --- dpctl/tensor/_stride_utils.pxi | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/dpctl/tensor/_stride_utils.pxi b/dpctl/tensor/_stride_utils.pxi index ea59ec5402..bc7349a5e6 100644 --- a/dpctl/tensor/_stride_utils.pxi +++ b/dpctl/tensor/_stride_utils.pxi @@ -64,8 +64,7 @@ cdef int _from_input_shape_strides( cdef int j cdef bint all_incr = 1 cdef bint all_decr = 1 - cdef bint all_incr_modified = 0 - cdef bint all_decr_modified = 0 + cdef bint strides_inspected = 0 cdef Py_ssize_t elem_count = 1 cdef Py_ssize_t min_shift = 0 cdef Py_ssize_t max_shift = 0 @@ -167,15 +166,14 @@ cdef int _from_input_shape_strides( while (j < nd and shape_arr[j] == 1): j = j + 1 if j < nd: + strides_inspected = 1 if all_incr: - all_incr_modified = 1 all_incr = ( (strides_arr[i] > 0) and (strides_arr[j] > 0) and (strides_arr[i] <= strides_arr[j]) ) if all_decr: - all_decr_modified = 1 all_decr = ( (strides_arr[i] > 0) and (strides_arr[j] > 0) and @@ -183,11 +181,18 @@ cdef int _from_input_shape_strides( ) i = j else: + if not strides_inspected: + # all dimensions have size 1 except + # dimension 'i'. Array is both C and F + # contiguous + strides_inspected = 1 + all_incr = (strides_arr[i] == 1) + all_decr = all_incr break # should only set contig flags on actually obtained # values, rather than default values - all_incr = all_incr and all_incr_modified - all_decr = all_decr and all_decr_modified + all_incr = all_incr and strides_inspected + all_decr = all_decr and strides_inspected if all_incr and all_decr: contig[0] = (USM_ARRAY_C_CONTIGUOUS | USM_ARRAY_F_CONTIGUOUS) elif all_incr: From 706d80f5ea26e9899d43db7a611a32b7b1546f9c Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Fri, 11 Aug 2023 07:52:11 -0500 Subject: [PATCH 8/8] Adds tests based on examples in issue gh-1334 --- dpctl/tests/test_usm_ndarray_ctor.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/dpctl/tests/test_usm_ndarray_ctor.py b/dpctl/tests/test_usm_ndarray_ctor.py index 01135cb7c3..4ab28db479 100644 --- a/dpctl/tests/test_usm_ndarray_ctor.py +++ b/dpctl/tests/test_usm_ndarray_ctor.py @@ -109,6 +109,25 @@ def test_usm_ndarray_flags(): x.flags["C"] = False +def test_usm_ndarray_flags_bug_gh_1334(): + get_queue_or_skip() + a = dpt.ones((2, 3), dtype="u4") + r = dpt.reshape(a, (1, 6, 1)) + assert r.flags["C"] and r.flags["F"] + + a = dpt.ones((2, 3), dtype="u4", order="F") + r = dpt.reshape(a, (1, 6, 1), order="F") + assert r.flags["C"] and r.flags["F"] + + a = dpt.ones((2, 3, 4), dtype="i8") + r = dpt.sum(a, axis=(1, 2), keepdims=True) + assert r.flags["C"] and r.flags["F"] + + a = dpt.ones((2, 1), dtype="?") + r = a[:, 1::-1] + assert r.flags["F"] and r.flags["C"] + + @pytest.mark.parametrize( "dtype", [