From c5fe3146cc3fcdd153273d70458a397593c57ce0 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Wed, 2 Aug 2023 16:17:01 -0500 Subject: [PATCH 01/26] Changed behavior of __array_namespace__ usm_ndarray constructor's array_namespace argument's default value of None is now interpreted to mean that usm_ndarray.__array_namespace__ returns dpctl.tensor, rather than None. This directly affects how Python operator special methods of usm_ndarray behave. They would now look up corresponding attribute in dpctl.tensor namespace. --- dpctl/tensor/_usmarray.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dpctl/tensor/_usmarray.pyx b/dpctl/tensor/_usmarray.pyx index 5b1bd5f6a3..1e8cf8d433 100644 --- a/dpctl/tensor/_usmarray.pyx +++ b/dpctl/tensor/_usmarray.pyx @@ -884,7 +884,7 @@ cdef class usm_ndarray: Returns array namespace, member functions of which implement data API. """ - return self.array_namespace_ + return self.array_namespace_ if self.array_namespace_ is not None else dpctl.tensor def __bool__(self): if self.size == 1: From 4555bf1a3acf25c40f9c38137e4d3304b7724489 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Wed, 2 Aug 2023 16:22:19 -0500 Subject: [PATCH 02/26] Corrected text of exception message --- dpctl/tensor/_usmarray.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dpctl/tensor/_usmarray.pyx b/dpctl/tensor/_usmarray.pyx index 1e8cf8d433..1667c174e7 100644 --- a/dpctl/tensor/_usmarray.pyx +++ b/dpctl/tensor/_usmarray.pyx @@ -898,7 +898,7 @@ cdef class usm_ndarray: raise ValueError( "The truth value of an array with more than one element is " - "ambiguous. Use a.any() or a.all()" + "ambiguous. Use dpctl.tensor.any() or dpctl.tensor.all()" ) def __float__(self): From 964db08258b43b3dafb182f45555c5ee21adfb86 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Wed, 2 Aug 2023 16:22:45 -0500 Subject: [PATCH 03/26] Corrected operator true_divide with divide --- dpctl/tensor/_usmarray.pyx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dpctl/tensor/_usmarray.pyx b/dpctl/tensor/_usmarray.pyx index 1667c174e7..0dc4b3395f 100644 --- a/dpctl/tensor/_usmarray.pyx +++ b/dpctl/tensor/_usmarray.pyx @@ -1202,9 +1202,9 @@ cdef class usm_ndarray: def __truediv__(first, other): "See comment in __add__" if isinstance(first, usm_ndarray): - return _dispatch_binary_elementwise(first, "true_divide", other) + return _dispatch_binary_elementwise(first, "divide", other) elif isinstance(other, usm_ndarray): - return _dispatch_binary_elementwise2(first, "true_divide", other) + return _dispatch_binary_elementwise2(first, "divide", other) return NotImplemented def __xor__(first, other): @@ -1249,7 +1249,7 @@ cdef class usm_ndarray: return _dispatch_binary_elementwise2(other, "subtract", self) def __rtruediv__(self, other): - return _dispatch_binary_elementwise2(other, "true_divide", self) + return _dispatch_binary_elementwise2(other, "divide", self) def __rxor__(self, other): return _dispatch_binary_elementwise2(other, "logical_xor", self) From 78aa99b4598c0164c3d6fc5421d113e7390362ec Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Wed, 2 Aug 2023 19:48:44 -0500 Subject: [PATCH 04/26] Fixed test per change in dpctl implementation --- dpctl/tests/test_usm_ndarray_operators.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dpctl/tests/test_usm_ndarray_operators.py b/dpctl/tests/test_usm_ndarray_operators.py index 92dc38bd47..def22562c0 100644 --- a/dpctl/tests/test_usm_ndarray_operators.py +++ b/dpctl/tests/test_usm_ndarray_operators.py @@ -47,7 +47,7 @@ def multiply(a, b): return b -@pytest.mark.parametrize("namespace", [None, Dummy()]) +@pytest.mark.parametrize("namespace", [dpt, Dummy()]) def test_fp_ops(namespace): try: X = dpt.ones(1) @@ -81,7 +81,7 @@ def test_fp_ops(namespace): X.__ifloordiv__(1.0) -@pytest.mark.parametrize("namespace", [None, Dummy()]) +@pytest.mark.parametrize("namespace", [dpt, Dummy()]) def test_int_ops(namespace): try: X = dpt.usm_ndarray(1, "i4") @@ -113,7 +113,7 @@ def test_int_ops(namespace): X.__ipow__(2) -@pytest.mark.parametrize("namespace", [None, Dummy()]) +@pytest.mark.parametrize("namespace", [dpt, Dummy()]) def test_mat_ops(namespace): try: M = dpt.eye(3, 3) From 51bf9e1283d9b5e553a4c283d87be50e5efc1ae0 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Thu, 3 Aug 2023 09:20:53 -0500 Subject: [PATCH 05/26] Fixed _slice_len for vacuous slices ``` In [1]: dpctl.tensor._usmarray as ua ua._basic_slice_meta((0, slice(1, 0, None)), (4, 4), (4, 1), 0) ``` The issue was discovered by array API conformance tests. --- dpctl/tensor/_slicing.pxi | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/dpctl/tensor/_slicing.pxi b/dpctl/tensor/_slicing.pxi index 361dd906c3..259248b323 100644 --- a/dpctl/tensor/_slicing.pxi +++ b/dpctl/tensor/_slicing.pxi @@ -33,9 +33,13 @@ cdef Py_ssize_t _slice_len( if sl_start == sl_stop: return 0 if sl_step > 0: + if sl_start > sl_stop: + return 0 # 1 + argmax k such htat sl_start + sl_step*k < sl_stop return 1 + ((sl_stop - sl_start - 1) // sl_step) else: + if sl_start < sl_stop: + return 0 return 1 + ((sl_stop - sl_start + 1) // sl_step) @@ -236,6 +240,7 @@ def _basic_slice_meta(ind, shape : tuple, strides : tuple, offset : int): new_offset = new_offset + sl_start * strides[k] if sh_i == 0: is_empty = True + new_offset = offset k = k_new elif _is_boolean(ind_i): new_shape.append(1 if ind_i else 0) From f4a31bb4f63edbd07dd0603f695d5c9c23f7cb36 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Thu, 3 Aug 2023 11:56:46 -0500 Subject: [PATCH 06/26] Fixed issue discovered by array API tests ``` import dpctl.tensor as dpt x = dpt.empy((0,2), dtype='i4') x[Ellipsis, 1] returns empty 1d array ``` --- dpctl/tensor/_slicing.pxi | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dpctl/tensor/_slicing.pxi b/dpctl/tensor/_slicing.pxi index 259248b323..4f45d57391 100644 --- a/dpctl/tensor/_slicing.pxi +++ b/dpctl/tensor/_slicing.pxi @@ -225,6 +225,9 @@ def _basic_slice_meta(ind, shape : tuple, strides : tuple, offset : int): k_new = k + ellipses_count new_shape.extend(shape[k:k_new]) new_strides.extend(strides[k:k_new]) + if any(dim == 0 for dim in shape[k:k_new]): + is_empty = True + new_offset = offset k = k_new elif ind_i is None: new_shape.append(1) From 05aa952de6acae3c088dfc44b82678eabea68d8d Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Thu, 3 Aug 2023 10:51:16 -0700 Subject: [PATCH 07/26] Corrected remaining operators in _usmarray.pyx - "power" corrected to "pow, "mod" corrected to "remainder" - logical functions corrected to bitwise --- dpctl/tensor/_usmarray.pyx | 42 +++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/dpctl/tensor/_usmarray.pyx b/dpctl/tensor/_usmarray.pyx index 0dc4b3395f..3a04f0f3d5 100644 --- a/dpctl/tensor/_usmarray.pyx +++ b/dpctl/tensor/_usmarray.pyx @@ -957,9 +957,9 @@ cdef class usm_ndarray: def __and__(first, other): "See comment in __add__" if isinstance(first, usm_ndarray): - return _dispatch_binary_elementwise(first, "logical_and", other) + return _dispatch_binary_elementwise(first, "bitwise_and", other) elif isinstance(other, usm_ndarray): - return _dispatch_binary_elementwise2(first, "logical_and", other) + return _dispatch_binary_elementwise2(first, "bitwise_and", other) return NotImplemented def __dlpack__(self, stream=None): @@ -1037,9 +1037,9 @@ cdef class usm_ndarray: def __lshift__(first, other): "See comment in __add__" if isinstance(first, usm_ndarray): - return _dispatch_binary_elementwise(first, "left_shift", other) + return _dispatch_binary_elementwise(first, "bitwise_left_shift", other) elif isinstance(other, usm_ndarray): - return _dispatch_binary_elementwise2(first, "left_shift", other) + return _dispatch_binary_elementwise2(first, "bitwise_left_shift", other) return NotImplemented def __lt__(self, other): @@ -1056,9 +1056,9 @@ cdef class usm_ndarray: def __mod__(first, other): "See comment in __add__" if isinstance(first, usm_ndarray): - return _dispatch_binary_elementwise(first, "mod", other) + return _dispatch_binary_elementwise(first, "remainder", other) elif isinstance(other, usm_ndarray): - return _dispatch_binary_elementwise2(first, "mod", other) + return _dispatch_binary_elementwise2(first, "remainder", other) return NotImplemented def __mul__(first, other): @@ -1078,9 +1078,9 @@ cdef class usm_ndarray: def __or__(first, other): "See comment in __add__" if isinstance(first, usm_ndarray): - return _dispatch_binary_elementwise(first, "logical_or", other) + return _dispatch_binary_elementwise(first, "bitwise_or", other) elif isinstance(other, usm_ndarray): - return _dispatch_binary_elementwise2(first, "logical_or", other) + return _dispatch_binary_elementwise2(first, "bitwise_or", other) return NotImplemented def __pos__(self): @@ -1090,17 +1090,17 @@ cdef class usm_ndarray: "See comment in __add__" if mod is None: if isinstance(first, usm_ndarray): - return _dispatch_binary_elementwise(first, "power", other) + return _dispatch_binary_elementwise(first, "pow", other) elif isinstance(other, usm_ndarray): - return _dispatch_binary_elementwise(first, "power", other) + return _dispatch_binary_elementwise(first, "pow", other) return NotImplemented def __rshift__(first, other): "See comment in __add__" if isinstance(first, usm_ndarray): - return _dispatch_binary_elementwise(first, "right_shift", other) + return _dispatch_binary_elementwise(first, "bitwise_right_shift", other) elif isinstance(other, usm_ndarray): - return _dispatch_binary_elementwise2(first, "right_shift", other) + return _dispatch_binary_elementwise2(first, "bitwise_right_shift", other) return NotImplemented def __setitem__(self, key, rhs): @@ -1210,40 +1210,40 @@ cdef class usm_ndarray: def __xor__(first, other): "See comment in __add__" if isinstance(first, usm_ndarray): - return _dispatch_binary_elementwise(first, "logical_xor", other) + return _dispatch_binary_elementwise(first, "bitwise_xor", other) elif isinstance(other, usm_ndarray): - return _dispatch_binary_elementwise2(first, "logical_xor", other) + return _dispatch_binary_elementwise2(first, "bitwise_xor", other) return NotImplemented def __radd__(self, other): return _dispatch_binary_elementwise(self, "add", other) def __rand__(self, other): - return _dispatch_binary_elementwise(self, "logical_and", other) + return _dispatch_binary_elementwise(self, "bitwise_and", other) def __rfloordiv__(self, other): return _dispatch_binary_elementwise2(other, "floor_divide", self) def __rlshift__(self, other): - return _dispatch_binary_elementwise2(other, "left_shift", self) + return _dispatch_binary_elementwise2(other, "bitwise_left_shift", self) def __rmatmul__(self, other): return _dispatch_binary_elementwise2(other, "matmul", self) def __rmod__(self, other): - return _dispatch_binary_elementwise2(other, "mod", self) + return _dispatch_binary_elementwise2(other, "remainder", self) def __rmul__(self, other): return _dispatch_binary_elementwise(self, "multiply", other) def __ror__(self, other): - return _dispatch_binary_elementwise(self, "logical_or", other) + return _dispatch_binary_elementwise(self, "bitwise_or", other) def __rpow__(self, other): - return _dispatch_binary_elementwise2(other, "power", self) + return _dispatch_binary_elementwise2(other, "pow", self) def __rrshift__(self, other): - return _dispatch_binary_elementwise2(other, "right_shift", self) + return _dispatch_binary_elementwise2(other, "bitwise_right_shift", self) def __rsub__(self, other): return _dispatch_binary_elementwise2(other, "subtract", self) @@ -1252,7 +1252,7 @@ cdef class usm_ndarray: return _dispatch_binary_elementwise2(other, "divide", self) def __rxor__(self, other): - return _dispatch_binary_elementwise2(other, "logical_xor", self) + return _dispatch_binary_elementwise2(other, "bitwise_xor", self) def __iadd__(self, other): from ._elementwise_funcs import add From 25547e75e1d3604fbbfcb1abfd6745fa667a122e Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Thu, 3 Aug 2023 14:53:34 -0500 Subject: [PATCH 08/26] Fixed bug in contiguity flag computation found by array-api-tests ``` import dpctl.tensor as dpt import dpctl.tensor._usmarray as ua x = dpt.asarray([[0, 0], [1, 0]], dtype='int8') key = (None, 0, slice(1, None, -1)) meta = ua._basic_slice_meta(key, x.shape, x.strides, x._element_offset) y = dpt.usm_ndarray.__new__(dpt.usm_ndarray, meta[0], dtype=x.dtype, strides=meta[1], buffer=x.usm_data, offset=meta[2]) print(y.flags) ``` Both 'C' and 'F' flags of the created array used to be set, now both are correctly unset. --- dpctl/tensor/_stride_utils.pxi | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/dpctl/tensor/_stride_utils.pxi b/dpctl/tensor/_stride_utils.pxi index 896c31e65a..ea59ec5402 100644 --- a/dpctl/tensor/_stride_utils.pxi +++ b/dpctl/tensor/_stride_utils.pxi @@ -64,6 +64,8 @@ cdef int _from_input_shape_strides( cdef int j cdef bint all_incr = 1 cdef bint all_decr = 1 + cdef bint all_incr_modified = 0 + cdef bint all_decr_modified = 0 cdef Py_ssize_t elem_count = 1 cdef Py_ssize_t min_shift = 0 cdef Py_ssize_t max_shift = 0 @@ -166,12 +168,14 @@ cdef int _from_input_shape_strides( j = j + 1 if j < nd: if all_incr: + all_incr_modified = 1 all_incr = ( (strides_arr[i] > 0) and (strides_arr[j] > 0) and (strides_arr[i] <= strides_arr[j]) ) if all_decr: + all_decr_modified = 1 all_decr = ( (strides_arr[i] > 0) and (strides_arr[j] > 0) and @@ -180,6 +184,10 @@ cdef int _from_input_shape_strides( i = j else: break + # should only set contig flags on actually obtained + # values, rather than default values + all_incr = all_incr and all_incr_modified + all_decr = all_decr and all_decr_modified if all_incr and all_decr: contig[0] = (USM_ARRAY_C_CONTIGUOUS | USM_ARRAY_F_CONTIGUOUS) elif all_incr: From 22c95b6207ae12863192c52857004411c237aa8a Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Thu, 3 Aug 2023 13:37:39 -0700 Subject: [PATCH 09/26] Fixed flags test case for changes to contiguity flag computation - Incorrect test logic was being hidden by the bug --- dpctl/tests/test_usm_ndarray_ctor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dpctl/tests/test_usm_ndarray_ctor.py b/dpctl/tests/test_usm_ndarray_ctor.py index 5772968d64..604ed9d87f 100644 --- a/dpctl/tests/test_usm_ndarray_ctor.py +++ b/dpctl/tests/test_usm_ndarray_ctor.py @@ -84,7 +84,7 @@ def test_usm_ndarray_flags(): assert f.forc assert f.fnc - f = dpt.usm_ndarray((5, 1, 1), dtype="i4", strides=(1, 0, 1)).flags + f = dpt.usm_ndarray((5, 0, 1), dtype="i4", strides=(1, 0, 1)).flags assert f.fc assert f.forc assert not dpt.usm_ndarray( From 4803f130ce8204c1e960e043c7f9773edf26e1c9 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Thu, 3 Aug 2023 14:57:59 -0500 Subject: [PATCH 10/26] unpacked chained method calls --- dpctl/tensor/_usmarray.pyx | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/dpctl/tensor/_usmarray.pyx b/dpctl/tensor/_usmarray.pyx index 3a04f0f3d5..65c18d7a37 100644 --- a/dpctl/tensor/_usmarray.pyx +++ b/dpctl/tensor/_usmarray.pyx @@ -889,7 +889,9 @@ cdef class usm_ndarray: def __bool__(self): if self.size == 1: mem_view = dpmem.as_usm_memory(self) - return mem_view.copy_to_host().view(self.dtype).__bool__() + host_buf = mem_view.copy_to_host() + view = host_buf.view(self.dtype) + return view.__bool__() if self.size == 0: raise ValueError( @@ -904,7 +906,9 @@ cdef class usm_ndarray: def __float__(self): if self.size == 1: mem_view = dpmem.as_usm_memory(self) - return mem_view.copy_to_host().view(self.dtype).__float__() + host_buf = mem_view.copy_to_host() + view = host_buf.view(self.dtype) + return view.__float__() raise ValueError( "only size-1 arrays can be converted to Python scalars" @@ -913,7 +917,9 @@ cdef class usm_ndarray: def __complex__(self): if self.size == 1: mem_view = dpmem.as_usm_memory(self) - return mem_view.copy_to_host().view(self.dtype).__complex__() + host_buf = mem_view.copy_to_host() + view = host_buf.view(self.dtype) + return view.__complex__() raise ValueError( "only size-1 arrays can be converted to Python scalars" @@ -922,7 +928,9 @@ cdef class usm_ndarray: def __int__(self): if self.size == 1: mem_view = dpmem.as_usm_memory(self) - return mem_view.copy_to_host().view(self.dtype).__int__() + host_buf = mem_view.copy_to_host() + view = host_buf.view(self.dtype) + return view.__int__() raise ValueError( "only size-1 arrays can be converted to Python scalars" From cc08b5d9e187d31d3797d256583a70b24b28db9f Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Sat, 5 Aug 2023 21:33:58 -0500 Subject: [PATCH 11/26] Fixed Cython warning --- dpctl/_sycl_queue.pxd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dpctl/_sycl_queue.pxd b/dpctl/_sycl_queue.pxd index 729adfc3cb..c906ada4d6 100644 --- a/dpctl/_sycl_queue.pxd +++ b/dpctl/_sycl_queue.pxd @@ -29,7 +29,7 @@ from ._sycl_event cimport SyclEvent from .program._program cimport SyclKernel -cdef void default_async_error_handler(int) nogil except * +cdef void default_async_error_handler(int) except * nogil cdef public api class _SyclQueue [ object Py_SyclQueueObject, type Py_SyclQueueType From 5c1a961afee913e675c92a7ecc7d15ab308f61d8 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Sat, 5 Aug 2023 21:34:52 -0500 Subject: [PATCH 12/26] Fixed array API test failure by adding validation --- dpctl/tensor/_copy_utils.py | 2 ++ dpctl/tensor/_usmarray.pyx | 19 ++++++++++++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/dpctl/tensor/_copy_utils.py b/dpctl/tensor/_copy_utils.py index e759571790..c6cbd2a173 100644 --- a/dpctl/tensor/_copy_utils.py +++ b/dpctl/tensor/_copy_utils.py @@ -492,6 +492,8 @@ def _extract_impl(ary, ary_mask, axis=0): dst = dpt.empty( dst_shape, dtype=ary.dtype, usm_type=ary.usm_type, device=ary.device ) + if dst.size == 0: + return dst hev, _ = ti._extract( src=ary, cumsum=cumsum, diff --git a/dpctl/tensor/_usmarray.pyx b/dpctl/tensor/_usmarray.pyx index 65c18d7a37..7ebf81657c 100644 --- a/dpctl/tensor/_usmarray.pyx +++ b/dpctl/tensor/_usmarray.pyx @@ -764,6 +764,8 @@ cdef class usm_ndarray: ind, (self).shape, ( self).strides, self.get_offset()) cdef usm_ndarray res + cdef int i = 0 + cdef bint matching = 1 if len(_meta) < 5: raise RuntimeError @@ -787,7 +789,20 @@ cdef class usm_ndarray: from ._copy_utils import _extract_impl, _nonzero_impl, _take_multi_index if len(adv_ind) == 1 and adv_ind[0].dtype == dpt_bool: - return _extract_impl(res, adv_ind[0], axis=adv_ind_start_p) + key_ = adv_ind[0] + adv_ind_end_p = key_.ndim + adv_ind_start_p + if adv_ind_end_p > res.ndim: + raise IndexError("too many indices for the array") + key_shape = key_.shape + arr_shape = res.shape[adv_ind_start_p:adv_ind_end_p] + for i in range(key_.ndim): + if matching: + if not key_shape[i] == arr_shape[i] and key_shape[i] > 0: + matching = 0 + if not matching: + raise IndexError("boolean index did not match indexed array in dimensions") + res = _extract_impl(res, key_, axis=adv_ind_start_p) + return res if any(ind.dtype == dpt_bool for ind in adv_ind): adv_ind_int = list() @@ -1152,6 +1167,8 @@ cdef class usm_ndarray: if adv_ind_start_p < 0: # basic slicing if isinstance(rhs, usm_ndarray): + if Xv.size == 0: + return _copy_from_usm_ndarray_to_usm_ndarray(Xv, rhs) else: if hasattr(rhs, "__sycl_usm_array_interface__"): From 07faf2b666bd6b3eaed49649bba4202140e8c741 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Sun, 6 Aug 2023 08:13:41 -0500 Subject: [PATCH 13/26] Use bitwise_invert for __invert__ --- dpctl/tensor/_usmarray.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dpctl/tensor/_usmarray.pyx b/dpctl/tensor/_usmarray.pyx index 7ebf81657c..57dd1e47f0 100644 --- a/dpctl/tensor/_usmarray.pyx +++ b/dpctl/tensor/_usmarray.pyx @@ -1046,7 +1046,7 @@ cdef class usm_ndarray: return _dispatch_binary_elementwise(self, "greater", other) def __invert__(self): - return _dispatch_unary_elementwise(self, "invert") + return _dispatch_unary_elementwise(self, "bitwise_invert") def __le__(self, other): return _dispatch_binary_elementwise(self, "less_equal", other) From 3ddf51c0498f9d1139b1cb550f6b58de8eb76eb4 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Sun, 6 Aug 2023 08:14:21 -0500 Subject: [PATCH 14/26] Corrected order='K' support in astype Array API tests pointed out an error in implementation of order='K' in dpctl.tensor.astype. Moved _empty_like_orderK and fried from _type_utils to _copy_utils and used it to implement astype. Modified import statement in _elementwise_common where _empty_like* are used. --- dpctl/tensor/_copy_utils.py | 114 ++++++++++++++++++--- dpctl/tensor/_elementwise_common.py | 3 +- dpctl/tensor/_type_utils.py | 94 ----------------- dpctl/tests/elementwise/test_type_utils.py | 9 +- 4 files changed, 103 insertions(+), 117 deletions(-) diff --git a/dpctl/tensor/_copy_utils.py b/dpctl/tensor/_copy_utils.py index c6cbd2a173..6ad4f6a154 100644 --- a/dpctl/tensor/_copy_utils.py +++ b/dpctl/tensor/_copy_utils.py @@ -13,6 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import builtins import operator import numpy as np @@ -361,6 +362,96 @@ def copy(usm_ary, order="K"): return R +def _empty_like_orderK(X, dt, usm_type=None, dev=None): + """Returns empty array like `x`, using order='K' + + For an array `x` that was obtained by permutation of a contiguous + array the returned array will have the same shape and the same + strides as `x`. + """ + if not isinstance(X, dpt.usm_ndarray): + raise TypeError(f"Expected usm_ndarray, got {type(X)}") + if usm_type is None: + usm_type = X.usm_type + if dev is None: + dev = X.device + fl = X.flags + if fl["C"] or X.size <= 1: + return dpt.empty_like( + X, dtype=dt, usm_type=usm_type, device=dev, order="C" + ) + elif fl["F"]: + return dpt.empty_like( + X, dtype=dt, usm_type=usm_type, device=dev, order="F" + ) + st = list(X.strides) + perm = sorted( + range(X.ndim), key=lambda d: builtins.abs(st[d]), reverse=True + ) + inv_perm = sorted(range(X.ndim), key=lambda i: perm[i]) + st_sorted = [st[i] for i in perm] + sh = X.shape + sh_sorted = tuple(sh[i] for i in perm) + R = dpt.empty(sh_sorted, dtype=dt, usm_type=usm_type, device=dev, order="C") + if min(st_sorted) < 0: + sl = tuple( + slice(None, None, -1) + if st_sorted[i] < 0 + else slice(None, None, None) + for i in range(X.ndim) + ) + R = R[sl] + return dpt.permute_dims(R, inv_perm) + + +def _empty_like_pair_orderK(X1, X2, dt, res_shape, usm_type, dev): + if not isinstance(X1, dpt.usm_ndarray): + raise TypeError(f"Expected usm_ndarray, got {type(X1)}") + if not isinstance(X2, dpt.usm_ndarray): + raise TypeError(f"Expected usm_ndarray, got {type(X2)}") + nd1 = X1.ndim + nd2 = X2.ndim + if nd1 > nd2 and X1.shape == res_shape: + return _empty_like_orderK(X1, dt, usm_type, dev) + elif nd1 < nd2 and X2.shape == res_shape: + return _empty_like_orderK(X2, dt, usm_type, dev) + fl1 = X1.flags + fl2 = X2.flags + if fl1["C"] or fl2["C"]: + return dpt.empty( + res_shape, dtype=dt, usm_type=usm_type, device=dev, order="C" + ) + if fl1["F"] and fl2["F"]: + return dpt.empty( + res_shape, dtype=dt, usm_type=usm_type, device=dev, order="F" + ) + st1 = list(X1.strides) + st2 = list(X2.strides) + max_ndim = max(nd1, nd2) + st1 += [0] * (max_ndim - len(st1)) + st2 += [0] * (max_ndim - len(st2)) + perm = sorted( + range(max_ndim), + key=lambda d: (builtins.abs(st1[d]), builtins.abs(st2[d])), + reverse=True, + ) + inv_perm = sorted(range(max_ndim), key=lambda i: perm[i]) + st1_sorted = [st1[i] for i in perm] + st2_sorted = [st2[i] for i in perm] + sh = res_shape + sh_sorted = tuple(sh[i] for i in perm) + R = dpt.empty(sh_sorted, dtype=dt, usm_type=usm_type, device=dev, order="C") + if max(min(st1_sorted), min(st2_sorted)) < 0: + sl = tuple( + slice(None, None, -1) + if (st1_sorted[i] < 0 and st2_sorted[i] < 0) + else slice(None, None, None) + for i in range(nd1) + ) + R = R[sl] + return dpt.permute_dims(R, inv_perm) + + def astype(usm_ary, newdtype, order="K", casting="unsafe", copy=True): """ astype(array, new_dtype, order="K", casting="unsafe", \ copy=True) @@ -432,26 +523,15 @@ def astype(usm_ary, newdtype, order="K", casting="unsafe", copy=True): "Unrecognized value of the order keyword. " "Recognized values are 'A', 'C', 'F', or 'K'" ) - R = dpt.usm_ndarray( - usm_ary.shape, - dtype=target_dtype, - buffer=usm_ary.usm_type, - order=copy_order, - buffer_ctor_kwargs={"queue": usm_ary.sycl_queue}, - ) - if order == "K" and (not c_contig and not f_contig): - original_strides = usm_ary.strides - ind = sorted( - range(usm_ary.ndim), - key=lambda i: abs(original_strides[i]), - reverse=True, - ) - new_strides = tuple(R.strides[ind[i]] for i in ind) + if order == "K": + R = _empty_like_orderK(usm_ary, target_dtype) + else: R = dpt.usm_ndarray( usm_ary.shape, dtype=target_dtype, - buffer=R.usm_data, - strides=new_strides, + buffer=usm_ary.usm_type, + order=copy_order, + buffer_ctor_kwargs={"queue": usm_ary.sycl_queue}, ) _copy_from_usm_ndarray_to_usm_ndarray(R, usm_ary) return R diff --git a/dpctl/tensor/_elementwise_common.py b/dpctl/tensor/_elementwise_common.py index 78c79fb2ad..f924ee31cd 100644 --- a/dpctl/tensor/_elementwise_common.py +++ b/dpctl/tensor/_elementwise_common.py @@ -26,10 +26,9 @@ from dpctl.tensor._usmarray import _is_object_with_buffer_protocol as _is_buffer from dpctl.utils import ExecutionPlacementError +from ._copy_utils import _empty_like_orderK, _empty_like_pair_orderK from ._type_utils import ( _acceptance_fn_default, - _empty_like_orderK, - _empty_like_pair_orderK, _find_buf_dtype, _find_buf_dtype2, _find_inplace_dtype, diff --git a/dpctl/tensor/_type_utils.py b/dpctl/tensor/_type_utils.py index fb2223f292..b576764689 100644 --- a/dpctl/tensor/_type_utils.py +++ b/dpctl/tensor/_type_utils.py @@ -14,8 +14,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import builtins - import dpctl.tensor as dpt import dpctl.tensor._tensor_impl as ti @@ -116,96 +114,6 @@ def _can_cast(from_: dpt.dtype, to_: dpt.dtype, _fp16: bool, _fp64: bool): return can_cast_v -def _empty_like_orderK(X, dt, usm_type=None, dev=None): - """Returns empty array like `x`, using order='K' - - For an array `x` that was obtained by permutation of a contiguous - array the returned array will have the same shape and the same - strides as `x`. - """ - if not isinstance(X, dpt.usm_ndarray): - raise TypeError(f"Expected usm_ndarray, got {type(X)}") - if usm_type is None: - usm_type = X.usm_type - if dev is None: - dev = X.device - fl = X.flags - if fl["C"] or X.size <= 1: - return dpt.empty_like( - X, dtype=dt, usm_type=usm_type, device=dev, order="C" - ) - elif fl["F"]: - return dpt.empty_like( - X, dtype=dt, usm_type=usm_type, device=dev, order="F" - ) - st = list(X.strides) - perm = sorted( - range(X.ndim), key=lambda d: builtins.abs(st[d]), reverse=True - ) - inv_perm = sorted(range(X.ndim), key=lambda i: perm[i]) - st_sorted = [st[i] for i in perm] - sh = X.shape - sh_sorted = tuple(sh[i] for i in perm) - R = dpt.empty(sh_sorted, dtype=dt, usm_type=usm_type, device=dev, order="C") - if min(st_sorted) < 0: - sl = tuple( - slice(None, None, -1) - if st_sorted[i] < 0 - else slice(None, None, None) - for i in range(X.ndim) - ) - R = R[sl] - return dpt.permute_dims(R, inv_perm) - - -def _empty_like_pair_orderK(X1, X2, dt, res_shape, usm_type, dev): - if not isinstance(X1, dpt.usm_ndarray): - raise TypeError(f"Expected usm_ndarray, got {type(X1)}") - if not isinstance(X2, dpt.usm_ndarray): - raise TypeError(f"Expected usm_ndarray, got {type(X2)}") - nd1 = X1.ndim - nd2 = X2.ndim - if nd1 > nd2 and X1.shape == res_shape: - return _empty_like_orderK(X1, dt, usm_type, dev) - elif nd1 < nd2 and X2.shape == res_shape: - return _empty_like_orderK(X2, dt, usm_type, dev) - fl1 = X1.flags - fl2 = X2.flags - if fl1["C"] or fl2["C"]: - return dpt.empty( - res_shape, dtype=dt, usm_type=usm_type, device=dev, order="C" - ) - if fl1["F"] and fl2["F"]: - return dpt.empty( - res_shape, dtype=dt, usm_type=usm_type, device=dev, order="F" - ) - st1 = list(X1.strides) - st2 = list(X2.strides) - max_ndim = max(nd1, nd2) - st1 += [0] * (max_ndim - len(st1)) - st2 += [0] * (max_ndim - len(st2)) - perm = sorted( - range(max_ndim), - key=lambda d: (builtins.abs(st1[d]), builtins.abs(st2[d])), - reverse=True, - ) - inv_perm = sorted(range(max_ndim), key=lambda i: perm[i]) - st1_sorted = [st1[i] for i in perm] - st2_sorted = [st2[i] for i in perm] - sh = res_shape - sh_sorted = tuple(sh[i] for i in perm) - R = dpt.empty(sh_sorted, dtype=dt, usm_type=usm_type, device=dev, order="C") - if max(min(st1_sorted), min(st2_sorted)) < 0: - sl = tuple( - slice(None, None, -1) - if (st1_sorted[i] < 0 and st2_sorted[i] < 0) - else slice(None, None, None) - for i in range(nd1) - ) - R = R[sl] - return dpt.permute_dims(R, inv_perm) - - def _to_device_supported_dtype(dt, dev): has_fp16 = dev.has_aspect_fp16 has_fp64 = dev.has_aspect_fp64 @@ -339,8 +247,6 @@ def _find_inplace_dtype(lhs_dtype, rhs_dtype, query_fn, sycl_dev): "_find_buf_dtype", "_find_buf_dtype2", "_find_inplace_dtype", - "_empty_like_orderK", - "_empty_like_pair_orderK", "_to_device_supported_dtype", "_acceptance_fn_default", "_acceptance_fn_divide", diff --git a/dpctl/tests/elementwise/test_type_utils.py b/dpctl/tests/elementwise/test_type_utils.py index 403e455c2e..2ca9c547d4 100644 --- a/dpctl/tests/elementwise/test_type_utils.py +++ b/dpctl/tests/elementwise/test_type_utils.py @@ -18,6 +18,7 @@ import dpctl import dpctl.tensor as dpt +import dpctl.tensor._copy_utils as cu import dpctl.tensor._type_utils as tu from .utils import _all_dtypes, _map_to_device_dtype @@ -73,15 +74,15 @@ def test_type_utils_empty_like_orderK(): a = dpt.empty((10, 10), dtype=dpt.int32, order="F") except dpctl.SyclDeviceCreationError: pytest.skip("No SYCL devices available") - X = tu._empty_like_orderK(a, dpt.int32, a.usm_type, a.device) + X = cu._empty_like_orderK(a, dpt.int32, a.usm_type, a.device) assert X.flags["F"] def test_type_utils_empty_like_orderK_invalid_args(): with pytest.raises(TypeError): - tu._empty_like_orderK([1, 2, 3], dpt.int32, "device", None) + cu._empty_like_orderK([1, 2, 3], dpt.int32, "device", None) with pytest.raises(TypeError): - tu._empty_like_pair_orderK( + cu._empty_like_pair_orderK( [1, 2, 3], ( 1, @@ -98,7 +99,7 @@ def test_type_utils_empty_like_orderK_invalid_args(): except dpctl.SyclDeviceCreationError: pytest.skip("No SYCL devices available") with pytest.raises(TypeError): - tu._empty_like_pair_orderK( + cu._empty_like_pair_orderK( a, ( 1, From 51e3f15d76aa0721566f26b9a4f6448deb79b4aa Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Sun, 6 Aug 2023 20:12:30 -0500 Subject: [PATCH 15/26] Moved 2 tests from test_type_utils to elementwise/test_type_utils The dpctl/tests/test_type_utils.py has been removed. --- dpctl/tests/elementwise/test_type_utils.py | 44 ++++++++++++++ dpctl/tests/test_type_utils.py | 68 ---------------------- 2 files changed, 44 insertions(+), 68 deletions(-) delete mode 100644 dpctl/tests/test_type_utils.py diff --git a/dpctl/tests/elementwise/test_type_utils.py b/dpctl/tests/elementwise/test_type_utils.py index 2ca9c547d4..f6e09b9b4e 100644 --- a/dpctl/tests/elementwise/test_type_utils.py +++ b/dpctl/tests/elementwise/test_type_utils.py @@ -186,3 +186,47 @@ def test_binary_func_arg_validation(): with pytest.raises(ValueError): dpt.add(a, Ellipsis) dpt.add(a, a, order="invalid") + + +def test_all_data_types(): + fp16_fp64_types = set([dpt.float16, dpt.float64, dpt.complex128]) + fp64_types = set([dpt.float64, dpt.complex128]) + + all_dts = tu._all_data_types(True, True) + assert fp16_fp64_types.issubset(all_dts) + + all_dts = tu._all_data_types(True, False) + assert dpt.float16 in all_dts + assert not fp64_types.issubset(all_dts) + + all_dts = tu._all_data_types(False, True) + assert dpt.float16 not in all_dts + assert fp64_types.issubset(all_dts) + + all_dts = tu._all_data_types(False, False) + assert not fp16_fp64_types.issubset(all_dts) + + +@pytest.mark.parametrize("fp16", [True, False]) +@pytest.mark.parametrize("fp64", [True, False]) +def test_maximal_inexact_types(fp16, fp64): + assert not tu._is_maximal_inexact_type(dpt.int32, fp16, fp64) + assert fp64 == tu._is_maximal_inexact_type(dpt.float64, fp16, fp64) + assert fp64 == tu._is_maximal_inexact_type(dpt.complex128, fp16, fp64) + assert fp64 != tu._is_maximal_inexact_type(dpt.float32, fp16, fp64) + assert fp64 != tu._is_maximal_inexact_type(dpt.complex64, fp16, fp64) + + +def test_can_cast_device(): + assert tu._can_cast(dpt.int64, dpt.float64, True, True) + # if f8 is available, can't cast i8 to f4 + assert not tu._can_cast(dpt.int64, dpt.float32, True, True) + assert not tu._can_cast(dpt.int64, dpt.float32, False, True) + # should be able to cast to f8 when f2 unavailable + assert tu._can_cast(dpt.int64, dpt.float64, False, True) + # casting to f4 acceptable when f8 unavailable + assert tu._can_cast(dpt.int64, dpt.float32, True, False) + assert tu._can_cast(dpt.int64, dpt.float32, False, False) + # can't safely cast inexact type to inexact type of lesser precision + assert not tu._can_cast(dpt.float32, dpt.float16, True, False) + assert not tu._can_cast(dpt.float64, dpt.float32, False, True) diff --git a/dpctl/tests/test_type_utils.py b/dpctl/tests/test_type_utils.py deleted file mode 100644 index 882478a2ce..0000000000 --- a/dpctl/tests/test_type_utils.py +++ /dev/null @@ -1,68 +0,0 @@ -# Data Parallel Control (dpctl) -# -# Copyright 2020-2023 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pytest - -import dpctl.tensor as dpt -from dpctl.tensor._type_utils import ( - _all_data_types, - _can_cast, - _is_maximal_inexact_type, -) - - -def test_all_data_types(): - fp16_fp64_types = set([dpt.float16, dpt.float64, dpt.complex128]) - fp64_types = set([dpt.float64, dpt.complex128]) - - all_dts = _all_data_types(True, True) - assert fp16_fp64_types.issubset(all_dts) - - all_dts = _all_data_types(True, False) - assert dpt.float16 in all_dts - assert not fp64_types.issubset(all_dts) - - all_dts = _all_data_types(False, True) - assert dpt.float16 not in all_dts - assert fp64_types.issubset(all_dts) - - all_dts = _all_data_types(False, False) - assert not fp16_fp64_types.issubset(all_dts) - - -@pytest.mark.parametrize("fp16", [True, False]) -@pytest.mark.parametrize("fp64", [True, False]) -def test_maximal_inexact_types(fp16, fp64): - assert not _is_maximal_inexact_type(dpt.int32, fp16, fp64) - assert fp64 == _is_maximal_inexact_type(dpt.float64, fp16, fp64) - assert fp64 == _is_maximal_inexact_type(dpt.complex128, fp16, fp64) - assert fp64 != _is_maximal_inexact_type(dpt.float32, fp16, fp64) - assert fp64 != _is_maximal_inexact_type(dpt.complex64, fp16, fp64) - - -def test_can_cast_device(): - assert _can_cast(dpt.int64, dpt.float64, True, True) - # if f8 is available, can't cast i8 to f4 - assert not _can_cast(dpt.int64, dpt.float32, True, True) - assert not _can_cast(dpt.int64, dpt.float32, False, True) - # should be able to cast to f8 when f2 unavailable - assert _can_cast(dpt.int64, dpt.float64, False, True) - # casting to f4 acceptable when f8 unavailable - assert _can_cast(dpt.int64, dpt.float32, True, False) - assert _can_cast(dpt.int64, dpt.float32, False, False) - # can't safely cast inexact type to inexact type of lesser precision - assert not _can_cast(dpt.float32, dpt.float16, True, False) - assert not _can_cast(dpt.float64, dpt.float32, False, True) From e5785ca51b1518d78b50d3395d8d9efc7fbef35a Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Sun, 6 Aug 2023 20:14:00 -0500 Subject: [PATCH 16/26] Fixed bug in concat uncovered by array API tests ``` import dpctl.tensor as dpt x1 = dpt.full(tuple(), 77, dtype='u2') x2 = dpt.zeros(2, dtype='uint8')[dpt.newaxis, :] dpt.concat((x1, x2), axis=None) ``` The reason the exception was raised is that _copy_usm_ndarray_for_reshape which is used in the implementation of concat with axis=None requires both source and destination to have the same data-type. --- dpctl/tensor/_manipulation_functions.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/dpctl/tensor/_manipulation_functions.py b/dpctl/tensor/_manipulation_functions.py index 7b066417af..4d5de3fb0f 100644 --- a/dpctl/tensor/_manipulation_functions.py +++ b/dpctl/tensor/_manipulation_functions.py @@ -554,8 +554,13 @@ def _concat_axis_None(arrays): sycl_queue=exec_q, ) else: + src_ = array + # _copy_usm_ndarray_for_reshape requires src and dst to have + # the same data type + if not array.dtype == res_dtype: + src_ = dpt.astype(src_, res_dtype) hev, _ = ti._copy_usm_ndarray_for_reshape( - src=array, + src=src_, dst=res[fill_start:fill_end], shift=0, sycl_queue=exec_q, From a3c00bc3ed9af0fc13f0331e526072690fb70575 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Sun, 6 Aug 2023 22:11:35 -0500 Subject: [PATCH 17/26] Closes gh-1325 ``` In [1]: import dpctl.tensor as dpt In [2]: a = dpt.arange(10, dtype='int64') ...: b = dpt.arange(10, dtype='float32') In [3]: dpt.concat((a,b)) Out[3]: usm_ndarray([0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 0., 1., 2., 3., 4., 5., 6., 7., 8., 9.], dtype=float32) In [4]: _.sycl_device.name Out[4]: 'Intel(R) Graphics [0x9a49]' ``` --- dpctl/tensor/_manipulation_functions.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dpctl/tensor/_manipulation_functions.py b/dpctl/tensor/_manipulation_functions.py index 4d5de3fb0f..26c1ab60cf 100644 --- a/dpctl/tensor/_manipulation_functions.py +++ b/dpctl/tensor/_manipulation_functions.py @@ -25,6 +25,8 @@ import dpctl.tensor._tensor_impl as ti import dpctl.utils as dputils +from ._type_utils import _to_device_supported_dtype + __doc__ = ( "Implementation module for array manipulation " "functions in :module:`dpctl.tensor`" @@ -504,8 +506,10 @@ def _arrays_validation(arrays, check_ndim=True): _supported_dtype(Xi.dtype for Xi in arrays) res_dtype = X0.dtype + dev = exec_q.sycl_device for i in range(1, n): res_dtype = np.promote_types(res_dtype, arrays[i]) + res_dtype = _to_device_supported_dtype(res_dtype, dev) if check_ndim: for i in range(1, n): From 80eae6ee12152ceedf9538a80c3312d205cd200f Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Mon, 7 Aug 2023 07:39:07 -0700 Subject: [PATCH 18/26] Corrected order="K" support in copy --- dpctl/tensor/_copy_utils.py | 131 ++++++++++++++++-------------------- 1 file changed, 59 insertions(+), 72 deletions(-) diff --git a/dpctl/tensor/_copy_utils.py b/dpctl/tensor/_copy_utils.py index 6ad4f6a154..565a11dec7 100644 --- a/dpctl/tensor/_copy_utils.py +++ b/dpctl/tensor/_copy_utils.py @@ -290,78 +290,6 @@ def _copy_from_usm_ndarray_to_usm_ndarray(dst, src): _copy_same_shape(dst, src_same_shape) -def copy(usm_ary, order="K"): - """copy(ary, order="K") - - Creates a copy of given instance of :class:`dpctl.tensor.usm_ndarray`. - - Args: - ary (usm_ndarray): - Input array. - order ({"C", "F", "A", "K"}, optional): - Controls the memory layout of the output array. - Returns: - usm_ndarray: - A copy of the input array. - - Memory layout of the copy is controlled by `order` keyword, - following NumPy's conventions. The `order` keywords can be - one of the following: - - - "C": C-contiguous memory layout - - "F": Fortran-contiguous memory layout - - "A": Fortran-contiguous if the input array is also Fortran-contiguous, - otherwise C-contiguous - - "K": match the layout of `usm_ary` as closely as possible. - - """ - if not isinstance(usm_ary, dpt.usm_ndarray): - return TypeError( - f"Expected object of type dpt.usm_ndarray, got {type(usm_ary)}" - ) - copy_order = "C" - if order == "C": - pass - elif order == "F": - copy_order = order - elif order == "A": - if usm_ary.flags.f_contiguous: - copy_order = "F" - elif order == "K": - if usm_ary.flags.f_contiguous: - copy_order = "F" - else: - raise ValueError( - "Unrecognized value of the order keyword. " - "Recognized values are 'A', 'C', 'F', or 'K'" - ) - c_contig = usm_ary.flags.c_contiguous - f_contig = usm_ary.flags.f_contiguous - R = dpt.usm_ndarray( - usm_ary.shape, - dtype=usm_ary.dtype, - buffer=usm_ary.usm_type, - order=copy_order, - buffer_ctor_kwargs={"queue": usm_ary.sycl_queue}, - ) - if order == "K" and (not c_contig and not f_contig): - original_strides = usm_ary.strides - ind = sorted( - range(usm_ary.ndim), - key=lambda i: abs(original_strides[i]), - reverse=True, - ) - new_strides = tuple(R.strides[ind[i]] for i in ind) - R = dpt.usm_ndarray( - usm_ary.shape, - dtype=usm_ary.dtype, - buffer=R.usm_data, - strides=new_strides, - ) - _copy_same_shape(R, usm_ary) - return R - - def _empty_like_orderK(X, dt, usm_type=None, dev=None): """Returns empty array like `x`, using order='K' @@ -452,6 +380,65 @@ def _empty_like_pair_orderK(X1, X2, dt, res_shape, usm_type, dev): return dpt.permute_dims(R, inv_perm) +def copy(usm_ary, order="K"): + """copy(ary, order="K") + + Creates a copy of given instance of :class:`dpctl.tensor.usm_ndarray`. + + Args: + ary (usm_ndarray): + Input array. + order ({"C", "F", "A", "K"}, optional): + Controls the memory layout of the output array. + Returns: + usm_ndarray: + A copy of the input array. + + Memory layout of the copy is controlled by `order` keyword, + following NumPy's conventions. The `order` keywords can be + one of the following: + + - "C": C-contiguous memory layout + - "F": Fortran-contiguous memory layout + - "A": Fortran-contiguous if the input array is also Fortran-contiguous, + otherwise C-contiguous + - "K": match the layout of `usm_ary` as closely as possible. + + """ + if not isinstance(usm_ary, dpt.usm_ndarray): + return TypeError( + f"Expected object of type dpt.usm_ndarray, got {type(usm_ary)}" + ) + copy_order = "C" + if order == "C": + pass + elif order == "F": + copy_order = order + elif order == "A": + if usm_ary.flags.f_contiguous: + copy_order = "F" + elif order == "K": + if usm_ary.flags.f_contiguous: + copy_order = "F" + else: + raise ValueError( + "Unrecognized value of the order keyword. " + "Recognized values are 'A', 'C', 'F', or 'K'" + ) + if order == "K": + R = _empty_like_orderK(usm_ary, usm_ary.dtype) + else: + R = dpt.usm_ndarray( + usm_ary.shape, + dtype=usm_ary.dtype, + buffer=usm_ary.usm_type, + order=copy_order, + buffer_ctor_kwargs={"queue": usm_ary.sycl_queue}, + ) + _copy_same_shape(R, usm_ary) + return R + + def astype(usm_ary, newdtype, order="K", casting="unsafe", copy=True): """ astype(array, new_dtype, order="K", casting="unsafe", \ copy=True) From ff1081aeb0c9555ddbd7761ef1175b562dc60272 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Mon, 7 Aug 2023 07:39:13 -0700 Subject: [PATCH 19/26] Fixed logaddexp for mixed nan and number operands --- .../elementwise_functions/logaddexp.hpp | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp index 39ba5c3fcf..f2d2505f2c 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp @@ -28,6 +28,7 @@ #include #include #include +#include #include #include "utils/offset_utils.hpp" @@ -55,13 +56,14 @@ using dpctl::tensor::type_utils::vec_cast; template struct LogAddExpFunctor { - using supports_sg_loadstore = typename std::negation< - std::disjunction, tu_ns::is_complex>>; - using supports_vec = typename std::negation< - std::disjunction, tu_ns::is_complex>>; + using supports_sg_loadstore = std::true_type; + using supports_vec = std::true_type; resT operator()(const argT1 &in1, const argT2 &in2) { + if (std::isnan(in1) || std::isnan(in2)) { + return std::numeric_limits::quiet_NaN(); + } resT max = std::max(in1, in2); resT min = std::min(in1, in2); return max + std::log1p(std::exp(min - max)); @@ -76,8 +78,13 @@ template struct LogAddExpFunctor #pragma unroll for (int i = 0; i < vec_sz; ++i) { - resT max = std::max(in1[i], in2[i]); - res[i] = max + std::log1p(std::exp(std::abs(diff[i]))); + if (std::isnan(in1[i]) || std::isnan(in2[i])) { + res[i] = std::numeric_limits::quiet_NaN(); + } + else { + resT max = std::max(in1[i], in2[i]); + res[i] = max + std::log1p(std::exp(std::abs(diff[i]))); + } } return res; From 1b5419f19a0be887743a316170ddb7a101d8ae37 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Mon, 7 Aug 2023 10:32:29 -0700 Subject: [PATCH 20/26] logaddexp now handles both NaNs and infinities correctly per array API --- .../kernels/elementwise_functions/logaddexp.hpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp index f2d2505f2c..b5f48dfffb 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp @@ -61,10 +61,10 @@ template struct LogAddExpFunctor resT operator()(const argT1 &in1, const argT2 &in2) { - if (std::isnan(in1) || std::isnan(in2)) { - return std::numeric_limits::quiet_NaN(); - } resT max = std::max(in1, in2); + if (std::isnan(max) || std::isinf(max)) { + return max; + } resT min = std::min(in1, in2); return max + std::log1p(std::exp(min - max)); } @@ -78,11 +78,11 @@ template struct LogAddExpFunctor #pragma unroll for (int i = 0; i < vec_sz; ++i) { - if (std::isnan(in1[i]) || std::isnan(in2[i])) { - res[i] = std::numeric_limits::quiet_NaN(); + resT max = std::max(in1[i], in2[i]); + if (std::isnan(max) || std::isinf(max)) { + res[i] = max; } else { - resT max = std::max(in1[i], in2[i]); res[i] = max + std::log1p(std::exp(std::abs(diff[i]))); } } From 3c874338321f18f5e554074c37ef4d1a30a3539a Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Mon, 7 Aug 2023 14:56:23 -0700 Subject: [PATCH 21/26] Broke up 'or' conditional in logaddexp logic for inf and NaN - 'or' conditions can sometimes cause wrong results when using the OS compiler --- .../kernels/elementwise_functions/logaddexp.hpp | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp index b5f48dfffb..91fca7fcb4 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp @@ -62,8 +62,13 @@ template struct LogAddExpFunctor resT operator()(const argT1 &in1, const argT2 &in2) { resT max = std::max(in1, in2); - if (std::isnan(max) || std::isinf(max)) { - return max; + if (std::isnan(max)) { + return std::numeric_limits::quiet_NaN(); + } + else { + if (std::isinf(max)) { + return std::numeric_limits::infinity(); + } } resT min = std::min(in1, in2); return max + std::log1p(std::exp(min - max)); @@ -79,8 +84,11 @@ template struct LogAddExpFunctor #pragma unroll for (int i = 0; i < vec_sz; ++i) { resT max = std::max(in1[i], in2[i]); - if (std::isnan(max) || std::isinf(max)) { - res[i] = max; + if (std::isnan(max)) { + res[i] = std::numeric_limits::quiet_NaN(); + } + else if (std::isinf(max)) { + res[i] = std::numeric_limits::infinity(); } else { res[i] = max + std::log1p(std::exp(std::abs(diff[i]))); From 3c0aeedfd824d6e8a4d665dd70d51e6cc65676d4 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 7 Aug 2023 11:42:10 -0500 Subject: [PATCH 22/26] Modularized logic implementing logaddexp If both arguments are -inf, the result is also -inf. --- .../elementwise_functions/logaddexp.hpp | 41 +++++++++---------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp index 91fca7fcb4..02375d5313 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp @@ -61,17 +61,7 @@ template struct LogAddExpFunctor resT operator()(const argT1 &in1, const argT2 &in2) { - resT max = std::max(in1, in2); - if (std::isnan(max)) { - return std::numeric_limits::quiet_NaN(); - } - else { - if (std::isinf(max)) { - return std::numeric_limits::infinity(); - } - } - resT min = std::min(in1, in2); - return max + std::log1p(std::exp(min - max)); + return impl(in1, in2); } template @@ -83,20 +73,29 @@ template struct LogAddExpFunctor #pragma unroll for (int i = 0; i < vec_sz; ++i) { - resT max = std::max(in1[i], in2[i]); - if (std::isnan(max)) { - res[i] = std::numeric_limits::quiet_NaN(); - } - else if (std::isinf(max)) { - res[i] = std::numeric_limits::infinity(); - } - else { - res[i] = max + std::log1p(std::exp(std::abs(diff[i]))); - } + res[i] = impl(in1[i], in2[i]); } return res; } + +private: + template T impl(T const &in1, T const &in2) + { + T max = std::max(in1, in2); + if (std::isnan(max)) { + return std::numeric_limits::quiet_NaN(); + } + else { + if (std::isinf(max)) { + // if both args are -inf, and hence max is -inf + // the result is -inf as well + return max; + } + } + T min = std::min(in1, in2); + return max + std::log1p(std::exp(min - max)); + } }; template Date: Mon, 7 Aug 2023 02:13:02 -0500 Subject: [PATCH 23/26] Simplified flags_ computation in to_device method Use `self.flags_` directly, instead of getting it via temporary Flags object. --- dpctl/tensor/_usmarray.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dpctl/tensor/_usmarray.pyx b/dpctl/tensor/_usmarray.pyx index 57dd1e47f0..20a226bd3e 100644 --- a/dpctl/tensor/_usmarray.pyx +++ b/dpctl/tensor/_usmarray.pyx @@ -872,7 +872,7 @@ cdef class usm_ndarray: strides=self.strides, offset=self.get_offset() ) - res.flags_ = self.flags.flags + res.flags_ = self.flags_ return res else: nbytes = self.usm_data.nbytes @@ -887,7 +887,7 @@ cdef class usm_ndarray: strides=self.strides, offset=self.get_offset() ) - res.flags_ = self.flags.flags + res.flags_ = self.flags_ return res def _set_namespace(self, mod): From cf7d9bf11d3e269d8b43ba128a9c709a58065fa1 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 7 Aug 2023 02:14:35 -0500 Subject: [PATCH 24/26] Change to test_complex_special_cases The change is to suppress RuntimeWarning arising from within assert_allclose utility. --- dpctl/tests/elementwise/test_complex.py | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/dpctl/tests/elementwise/test_complex.py b/dpctl/tests/elementwise/test_complex.py index 684b405612..85d3c0ad2e 100644 --- a/dpctl/tests/elementwise/test_complex.py +++ b/dpctl/tests/elementwise/test_complex.py @@ -15,6 +15,7 @@ # limitations under the License. import itertools +import warnings import numpy as np import pytest @@ -203,12 +204,18 @@ def test_complex_special_cases(dtype): Xc = dpt.asarray(Xc_np, dtype=dtype, sycl_queue=q) tol = 8 * dpt.finfo(dtype).resolution - assert_allclose( - dpt.asnumpy(dpt.real(Xc)), np.real(Xc_np), atol=tol, rtol=tol - ) - assert_allclose( - dpt.asnumpy(dpt.imag(Xc)), np.imag(Xc_np), atol=tol, rtol=tol - ) - assert_allclose( - dpt.asnumpy(dpt.conj(Xc)), np.conj(Xc_np), atol=tol, rtol=tol - ) + + actual = dpt.real(Xc) + expected = np.real(Xc_np) + assert_allclose(dpt.asnumpy(actual), expected, atol=tol, rtol=tol) + + actual = dpt.imag(Xc) + expected = np.imag(Xc_np) + assert_allclose(dpt.asnumpy(actual), expected, atol=tol, rtol=tol) + + actual = dpt.conj(Xc) + expected = np.conj(Xc_np) + + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + assert_allclose(dpt.asnumpy(actual), expected, atol=tol, rtol=tol) From 8343edc268e95837ea686f7b7050bffed4f02a08 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 7 Aug 2023 18:29:30 -0500 Subject: [PATCH 25/26] Array-API conformance testing can start as soon as build_linux jobs finished --- .github/workflows/conda-package.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml index 74e387f91a..f717f4b309 100644 --- a/.github/workflows/conda-package.yml +++ b/.github/workflows/conda-package.yml @@ -486,7 +486,7 @@ jobs: done array-api-conformity: - needs: test_linux + needs: build_linux runs-on: ${{ matrix.runner }} strategy: From ebd1fafe53a86f647bd78d015290360508cb10d3 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 7 Aug 2023 23:21:09 -0500 Subject: [PATCH 26/26] Fixed log-add-exp per review feedback --- .../elementwise_functions/logaddexp.hpp | 37 +++++++++++++------ 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp index 02375d5313..b718a5f991 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp @@ -69,11 +69,17 @@ template struct LogAddExpFunctor const sycl::vec &in2) { sycl::vec res; - auto diff = in1 - in2; + auto diff = in1 - in2; // take advantange of faster vec arithmetic #pragma unroll for (int i = 0; i < vec_sz; ++i) { - res[i] = impl(in1[i], in2[i]); + if (std::isfinite(diff[i])) { + res[i] = std::max(in1[i], in2[i]) + + impl_finite(-std::abs(diff[i])); + } + else { + res[i] = impl(in1[i], in2[i]); + } } return res; @@ -82,19 +88,28 @@ template struct LogAddExpFunctor private: template T impl(T const &in1, T const &in2) { - T max = std::max(in1, in2); - if (std::isnan(max)) { - return std::numeric_limits::quiet_NaN(); + if (in1 == in2) { // handle signed infinities + const T log2 = std::log(T(2)); + return in1 + log2; } else { - if (std::isinf(max)) { - // if both args are -inf, and hence max is -inf - // the result is -inf as well - return max; + const T tmp = in1 - in2; + if (tmp > 0) { + return in1 + std::log1p(std::exp(-tmp)); + } + else if (tmp <= 0) { + return in2 + std::log1p(std::exp(tmp)); + } + else { + return std::numeric_limits::quiet_NaN(); } } - T min = std::min(in1, in2); - return max + std::log1p(std::exp(min - max)); + } + + template T impl_finite(T const &in) + { + return (in > 0) ? (in + std::log1p(std::exp(-in))) + : std::log1p(std::exp(in)); } };