diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml index 74e387f91a..f717f4b309 100644 --- a/.github/workflows/conda-package.yml +++ b/.github/workflows/conda-package.yml @@ -486,7 +486,7 @@ jobs: done array-api-conformity: - needs: test_linux + needs: build_linux runs-on: ${{ matrix.runner }} strategy: diff --git a/dpctl/_backend.pxd b/dpctl/_backend.pxd index 630b80bb55..3f7ba63a55 100644 --- a/dpctl/_backend.pxd +++ b/dpctl/_backend.pxd @@ -299,6 +299,7 @@ cdef extern from "syclinterface/dpctl_sycl_platform_manager.h": cdef extern from "syclinterface/dpctl_sycl_platform_interface.h": + cdef bool DPCTLPlatform_AreEq(const DPCTLSyclPlatformRef, const DPCTLSyclPlatformRef) cdef DPCTLSyclPlatformRef DPCTLPlatform_Copy(const DPCTLSyclPlatformRef) cdef DPCTLSyclPlatformRef DPCTLPlatform_Create() cdef DPCTLSyclPlatformRef DPCTLPlatform_CreateFromSelector( @@ -308,6 +309,7 @@ cdef extern from "syclinterface/dpctl_sycl_platform_interface.h": cdef const char *DPCTLPlatform_GetName(const DPCTLSyclPlatformRef) cdef const char *DPCTLPlatform_GetVendor(const DPCTLSyclPlatformRef) cdef const char *DPCTLPlatform_GetVersion(const DPCTLSyclPlatformRef) + cdef size_t DPCTLPlatform_Hash(const DPCTLSyclPlatformRef) cdef DPCTLPlatformVectorRef DPCTLPlatform_GetPlatforms() cdef DPCTLSyclContextRef DPCTLPlatform_GetDefaultContext( const DPCTLSyclPlatformRef) diff --git a/dpctl/_sycl_platform.pxd b/dpctl/_sycl_platform.pxd index d8d737d874..2f619203d6 100644 --- a/dpctl/_sycl_platform.pxd +++ b/dpctl/_sycl_platform.pxd @@ -21,6 +21,8 @@ SYCL platform-related helper functions. """ +from libcpp cimport bool + from ._backend cimport DPCTLSyclDeviceSelectorRef, DPCTLSyclPlatformRef @@ -40,6 +42,7 @@ cdef class SyclPlatform(_SyclPlatform): cdef int _init_from_selector(self, DPCTLSyclDeviceSelectorRef DSRef) cdef int _init_from__SyclPlatform(self, _SyclPlatform other) cdef DPCTLSyclPlatformRef get_platform_ref(self) + cdef bool equals(self, SyclPlatform) cpdef list get_platforms() diff --git a/dpctl/_sycl_platform.pyx b/dpctl/_sycl_platform.pyx index 449ad9029f..589c8ae895 100644 --- a/dpctl/_sycl_platform.pyx +++ b/dpctl/_sycl_platform.pyx @@ -21,10 +21,13 @@ """ Implements SyclPlatform Cython extension type. """ +from libcpp cimport bool + from ._backend cimport ( # noqa: E211 DPCTLCString_Delete, DPCTLDeviceSelector_Delete, DPCTLFilterSelector_Create, + DPCTLPlatform_AreEq, DPCTLPlatform_Copy, DPCTLPlatform_Create, DPCTLPlatform_CreateFromSelector, @@ -35,6 +38,7 @@ from ._backend cimport ( # noqa: E211 DPCTLPlatform_GetPlatforms, DPCTLPlatform_GetVendor, DPCTLPlatform_GetVersion, + DPCTLPlatform_Hash, DPCTLPlatformMgr_GetInfo, DPCTLPlatformMgr_PrintInfo, DPCTLPlatformVector_Delete, @@ -274,6 +278,42 @@ cdef class SyclPlatform(_SyclPlatform): else: return SyclContext._create(CRef) + cdef bool equals(self, SyclPlatform other): + """ + Returns true if the :class:`dpctl.SyclPlatform` argument has the + same underlying ``DPCTLSyclPlatformRef`` object as this + :class:`dpctl.SyclPlatform` instance. + + Returns: + :obj:`bool`: ``True`` if the two :class:`dpctl.SyclPlatform` objects + point to the same ``DPCTLSyclPlatformRef`` object, otherwise + ``False``. + """ + return DPCTLPlatform_AreEq(self._platform_ref, other.get_platform_ref()) + + def __eq__(self, other): + """ + Returns True if the :class:`dpctl.SyclPlatform` argument has the + same underlying ``DPCTLSyclPlatformRef`` object as this + :class:`dpctl.SyclPlatform` instance. + + Returns: + :obj:`bool`: ``True`` if the two :class:`dpctl.SyclPlatform` objects + point to the same ``DPCTLSyclPlatformRef`` object, otherwise + ``False``. + """ + if isinstance(other, SyclPlatform): + return self.equals( other) + else: + return False + + def __hash__(self): + """ + Returns a hash value by hashing the underlying ``sycl::platform`` object. + + """ + return DPCTLPlatform_Hash(self._platform_ref) + def lsplatform(verbosity=0): """ diff --git a/dpctl/_sycl_queue.pxd b/dpctl/_sycl_queue.pxd index 729adfc3cb..c906ada4d6 100644 --- a/dpctl/_sycl_queue.pxd +++ b/dpctl/_sycl_queue.pxd @@ -29,7 +29,7 @@ from ._sycl_event cimport SyclEvent from .program._program cimport SyclKernel -cdef void default_async_error_handler(int) nogil except * +cdef void default_async_error_handler(int) except * nogil cdef public api class _SyclQueue [ object Py_SyclQueueObject, type Py_SyclQueueType diff --git a/dpctl/tensor/__init__.py b/dpctl/tensor/__init__.py index 0134998f7a..f1ec439f3a 100644 --- a/dpctl/tensor/__init__.py +++ b/dpctl/tensor/__init__.py @@ -135,6 +135,8 @@ logical_not, logical_or, logical_xor, + maximum, + minimum, multiply, negative, not_equal, @@ -274,6 +276,8 @@ "log1p", "log2", "log10", + "maximum", + "minimum", "multiply", "negative", "not_equal", diff --git a/dpctl/tensor/_copy_utils.py b/dpctl/tensor/_copy_utils.py index e759571790..eab1febb89 100644 --- a/dpctl/tensor/_copy_utils.py +++ b/dpctl/tensor/_copy_utils.py @@ -13,6 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import builtins import operator import numpy as np @@ -245,6 +246,23 @@ def _broadcast_shapes(sh1, sh2): ).shape +def _broadcast_strides(X_shape, X_strides, res_ndim): + """ + Broadcasts strides to match the given dimensions; + returns tuple type strides. + """ + out_strides = [0] * res_ndim + X_shape_len = len(X_shape) + str_dim = -X_shape_len + for i in range(X_shape_len): + shape_value = X_shape[i] + if not shape_value == 1: + out_strides[str_dim] = X_strides[i] + str_dim += 1 + + return tuple(out_strides) + + def _copy_from_usm_ndarray_to_usm_ndarray(dst, src): if any( not isinstance(arg, dpt.usm_ndarray) @@ -267,7 +285,7 @@ def _copy_from_usm_ndarray_to_usm_ndarray(dst, src): except ValueError as exc: raise ValueError("Shapes of two arrays are not compatible") from exc - if dst.size < src.size: + if dst.size < src.size and dst.size < np.prod(common_shape): raise ValueError("Destination is smaller ") if len(common_shape) > dst.ndim: @@ -278,17 +296,127 @@ def _copy_from_usm_ndarray_to_usm_ndarray(dst, src): common_shape = common_shape[ones_count:] if src.ndim < len(common_shape): - new_src_strides = (0,) * (len(common_shape) - src.ndim) + src.strides + new_src_strides = _broadcast_strides( + src.shape, src.strides, len(common_shape) + ) + src_same_shape = dpt.usm_ndarray( + common_shape, dtype=src.dtype, buffer=src, strides=new_src_strides + ) + elif src.ndim == len(common_shape): + new_src_strides = _broadcast_strides( + src.shape, src.strides, len(common_shape) + ) src_same_shape = dpt.usm_ndarray( common_shape, dtype=src.dtype, buffer=src, strides=new_src_strides ) else: - src_same_shape = src - src_same_shape.shape = common_shape + # since broadcasting succeeded, src.ndim is greater because of + # leading sequence of ones, so we trim it + n = len(common_shape) + new_src_strides = _broadcast_strides( + src.shape[-n:], src.strides[-n:], n + ) + src_same_shape = dpt.usm_ndarray( + common_shape, + dtype=src.dtype, + buffer=src.usm_data, + strides=new_src_strides, + offset=src._element_offset, + ) _copy_same_shape(dst, src_same_shape) +def _empty_like_orderK(X, dt, usm_type=None, dev=None): + """Returns empty array like `x`, using order='K' + + For an array `x` that was obtained by permutation of a contiguous + array the returned array will have the same shape and the same + strides as `x`. + """ + if not isinstance(X, dpt.usm_ndarray): + raise TypeError(f"Expected usm_ndarray, got {type(X)}") + if usm_type is None: + usm_type = X.usm_type + if dev is None: + dev = X.device + fl = X.flags + if fl["C"] or X.size <= 1: + return dpt.empty_like( + X, dtype=dt, usm_type=usm_type, device=dev, order="C" + ) + elif fl["F"]: + return dpt.empty_like( + X, dtype=dt, usm_type=usm_type, device=dev, order="F" + ) + st = list(X.strides) + perm = sorted( + range(X.ndim), key=lambda d: builtins.abs(st[d]), reverse=True + ) + inv_perm = sorted(range(X.ndim), key=lambda i: perm[i]) + st_sorted = [st[i] for i in perm] + sh = X.shape + sh_sorted = tuple(sh[i] for i in perm) + R = dpt.empty(sh_sorted, dtype=dt, usm_type=usm_type, device=dev, order="C") + if min(st_sorted) < 0: + sl = tuple( + slice(None, None, -1) + if st_sorted[i] < 0 + else slice(None, None, None) + for i in range(X.ndim) + ) + R = R[sl] + return dpt.permute_dims(R, inv_perm) + + +def _empty_like_pair_orderK(X1, X2, dt, res_shape, usm_type, dev): + if not isinstance(X1, dpt.usm_ndarray): + raise TypeError(f"Expected usm_ndarray, got {type(X1)}") + if not isinstance(X2, dpt.usm_ndarray): + raise TypeError(f"Expected usm_ndarray, got {type(X2)}") + nd1 = X1.ndim + nd2 = X2.ndim + if nd1 > nd2 and X1.shape == res_shape: + return _empty_like_orderK(X1, dt, usm_type, dev) + elif nd1 < nd2 and X2.shape == res_shape: + return _empty_like_orderK(X2, dt, usm_type, dev) + fl1 = X1.flags + fl2 = X2.flags + if fl1["C"] or fl2["C"]: + return dpt.empty( + res_shape, dtype=dt, usm_type=usm_type, device=dev, order="C" + ) + if fl1["F"] and fl2["F"]: + return dpt.empty( + res_shape, dtype=dt, usm_type=usm_type, device=dev, order="F" + ) + st1 = list(X1.strides) + st2 = list(X2.strides) + max_ndim = max(nd1, nd2) + st1 += [0] * (max_ndim - len(st1)) + st2 += [0] * (max_ndim - len(st2)) + perm = sorted( + range(max_ndim), + key=lambda d: (builtins.abs(st1[d]), builtins.abs(st2[d])), + reverse=True, + ) + inv_perm = sorted(range(max_ndim), key=lambda i: perm[i]) + st1_sorted = [st1[i] for i in perm] + st2_sorted = [st2[i] for i in perm] + sh = res_shape + sh_sorted = tuple(sh[i] for i in perm) + R = dpt.empty(sh_sorted, dtype=dt, usm_type=usm_type, device=dev, order="C") + if max(min(st1_sorted), min(st2_sorted)) < 0: + sl = tuple( + slice(None, None, -1) + if (st1_sorted[i] < 0 and st2_sorted[i] < 0) + else slice(None, None, None) + for i in range(nd1) + ) + R = R[sl] + return dpt.permute_dims(R, inv_perm) + + def copy(usm_ary, order="K"): """copy(ary, order="K") @@ -334,28 +462,15 @@ def copy(usm_ary, order="K"): "Unrecognized value of the order keyword. " "Recognized values are 'A', 'C', 'F', or 'K'" ) - c_contig = usm_ary.flags.c_contiguous - f_contig = usm_ary.flags.f_contiguous - R = dpt.usm_ndarray( - usm_ary.shape, - dtype=usm_ary.dtype, - buffer=usm_ary.usm_type, - order=copy_order, - buffer_ctor_kwargs={"queue": usm_ary.sycl_queue}, - ) - if order == "K" and (not c_contig and not f_contig): - original_strides = usm_ary.strides - ind = sorted( - range(usm_ary.ndim), - key=lambda i: abs(original_strides[i]), - reverse=True, - ) - new_strides = tuple(R.strides[ind[i]] for i in ind) + if order == "K": + R = _empty_like_orderK(usm_ary, usm_ary.dtype) + else: R = dpt.usm_ndarray( usm_ary.shape, dtype=usm_ary.dtype, - buffer=R.usm_data, - strides=new_strides, + buffer=usm_ary.usm_type, + order=copy_order, + buffer_ctor_kwargs={"queue": usm_ary.sycl_queue}, ) _copy_same_shape(R, usm_ary) return R @@ -432,26 +547,15 @@ def astype(usm_ary, newdtype, order="K", casting="unsafe", copy=True): "Unrecognized value of the order keyword. " "Recognized values are 'A', 'C', 'F', or 'K'" ) - R = dpt.usm_ndarray( - usm_ary.shape, - dtype=target_dtype, - buffer=usm_ary.usm_type, - order=copy_order, - buffer_ctor_kwargs={"queue": usm_ary.sycl_queue}, - ) - if order == "K" and (not c_contig and not f_contig): - original_strides = usm_ary.strides - ind = sorted( - range(usm_ary.ndim), - key=lambda i: abs(original_strides[i]), - reverse=True, - ) - new_strides = tuple(R.strides[ind[i]] for i in ind) + if order == "K": + R = _empty_like_orderK(usm_ary, target_dtype) + else: R = dpt.usm_ndarray( usm_ary.shape, dtype=target_dtype, - buffer=R.usm_data, - strides=new_strides, + buffer=usm_ary.usm_type, + order=copy_order, + buffer_ctor_kwargs={"queue": usm_ary.sycl_queue}, ) _copy_from_usm_ndarray_to_usm_ndarray(R, usm_ary) return R @@ -492,6 +596,8 @@ def _extract_impl(ary, ary_mask, axis=0): dst = dpt.empty( dst_shape, dtype=ary.dtype, usm_type=ary.usm_type, device=ary.device ) + if dst.size == 0: + return dst hev, _ = ti._extract( src=ary, cumsum=cumsum, @@ -517,7 +623,7 @@ def _nonzero_impl(ary): mask_nelems, dtype=cumsum_dt, sycl_queue=exec_q, order="C" ) mask_count = ti.mask_positions(ary, cumsum, sycl_queue=exec_q) - indexes_dt = ti.default_device_int_type(exec_q.sycl_device) + indexes_dt = ti.default_device_index_type(exec_q.sycl_device) indexes = dpt.empty( (ary.ndim, mask_count), dtype=indexes_dt, diff --git a/dpctl/tensor/_elementwise_common.py b/dpctl/tensor/_elementwise_common.py index 78c79fb2ad..f924ee31cd 100644 --- a/dpctl/tensor/_elementwise_common.py +++ b/dpctl/tensor/_elementwise_common.py @@ -26,10 +26,9 @@ from dpctl.tensor._usmarray import _is_object_with_buffer_protocol as _is_buffer from dpctl.utils import ExecutionPlacementError +from ._copy_utils import _empty_like_orderK, _empty_like_pair_orderK from ._type_utils import ( _acceptance_fn_default, - _empty_like_orderK, - _empty_like_pair_orderK, _find_buf_dtype, _find_buf_dtype2, _find_inplace_dtype, diff --git a/dpctl/tensor/_elementwise_funcs.py b/dpctl/tensor/_elementwise_funcs.py index 0ebe874df2..fe85a183ba 100644 --- a/dpctl/tensor/_elementwise_funcs.py +++ b/dpctl/tensor/_elementwise_funcs.py @@ -1176,6 +1176,66 @@ _logical_xor_docstring_, ) +# B??: ==== MAXIMUM (x1, x2) +_maximum_docstring_ = """ +maximum(x1, x2, out=None, order='K') + +Compares two input arrays `x1` and `x2` and returns +a new array containing the element-wise maxima. + +Args: + x1 (usm_ndarray): + First input array, expected to have numeric data type. + x2 (usm_ndarray): + Second input array, also expected to have numeric data type. + out ({None, usm_ndarray}, optional): + Output array to populate. + Array have the correct shape and the expected data type. + order ("C","F","A","K", optional): + Memory layout of the newly output array, if parameter `out` is `None`. + Default: "K". +Returns: + usm_narray: + An array containing the element-wise maxima. The data type of + the returned array is determined by the Type Promotion Rules. +""" +maximum = BinaryElementwiseFunc( + "maximum", + ti._maximum_result_type, + ti._maximum, + _maximum_docstring_, +) + +# B??: ==== MINIMUM (x1, x2) +_minimum_docstring_ = """ +minimum(x1, x2, out=None, order='K') + +Compares two input arrays `x1` and `x2` and returns +a new array containing the element-wise minima. + +Args: + x1 (usm_ndarray): + First input array, expected to have numeric data type. + x2 (usm_ndarray): + Second input array, also expected to have numeric data type. + out ({None, usm_ndarray}, optional): + Output array to populate. + Array have the correct shape and the expected data type. + order ("C","F","A","K", optional): + Memory layout of the newly output array, if parameter `out` is `None`. + Default: "K". +Returns: + usm_narray: + An array containing the element-wise minima. The data type of + the returned array is determined by the Type Promotion Rules. +""" +minimum = BinaryElementwiseFunc( + "minimum", + ti._minimum_result_type, + ti._minimum, + _minimum_docstring_, +) + # B19: ==== MULTIPLY (x1, x2) _multiply_docstring_ = """ multiply(x1, x2, out=None, order='K') @@ -1369,6 +1429,12 @@ First input array, expected to have a real-valued data type. x2 (usm_ndarray): Second input array, also expected to have a real-valued data type. + out ({None, usm_ndarray}, optional): + Output array to populate. + Array have the correct shape and the expected data type. + order ("C","F","A","K", optional): + Memory layout of the newly output array, if parameter `out` is `None`. + Default: "K". Returns: usm_ndarray: an array containing the element-wise remainders. The data type of diff --git a/dpctl/tensor/_indexing_functions.py b/dpctl/tensor/_indexing_functions.py index d73a2e7952..188f3cd291 100644 --- a/dpctl/tensor/_indexing_functions.py +++ b/dpctl/tensor/_indexing_functions.py @@ -343,5 +343,5 @@ def nonzero(arr): "Expecting dpctl.tensor.usm_ndarray type, " f"got {type(arr)}" ) if arr.ndim == 0: - raise ValueError("Array of positive rank is exepcted") + raise ValueError("Array of positive rank is expected") return _nonzero_impl(arr) diff --git a/dpctl/tensor/_manipulation_functions.py b/dpctl/tensor/_manipulation_functions.py index 7b066417af..9406e386af 100644 --- a/dpctl/tensor/_manipulation_functions.py +++ b/dpctl/tensor/_manipulation_functions.py @@ -25,6 +25,9 @@ import dpctl.tensor._tensor_impl as ti import dpctl.utils as dputils +from ._copy_utils import _broadcast_strides +from ._type_utils import _to_device_supported_dtype + __doc__ = ( "Implementation module for array manipulation " "functions in :module:`dpctl.tensor`" @@ -118,23 +121,6 @@ def __repr__(self): return self._finfo.__repr__() -def _broadcast_strides(X_shape, X_strides, res_ndim): - """ - Broadcasts strides to match the given dimensions; - returns tuple type strides. - """ - out_strides = [0] * res_ndim - X_shape_len = len(X_shape) - str_dim = -X_shape_len - for i in range(X_shape_len): - shape_value = X_shape[i] - if not shape_value == 1: - out_strides[str_dim] = X_strides[i] - str_dim += 1 - - return tuple(out_strides) - - def _broadcast_shape_impl(shapes): if len(set(shapes)) == 1: return shapes[0] @@ -504,8 +490,10 @@ def _arrays_validation(arrays, check_ndim=True): _supported_dtype(Xi.dtype for Xi in arrays) res_dtype = X0.dtype + dev = exec_q.sycl_device for i in range(1, n): res_dtype = np.promote_types(res_dtype, arrays[i]) + res_dtype = _to_device_supported_dtype(res_dtype, dev) if check_ndim: for i in range(1, n): @@ -554,8 +542,13 @@ def _concat_axis_None(arrays): sycl_queue=exec_q, ) else: + src_ = array + # _copy_usm_ndarray_for_reshape requires src and dst to have + # the same data type + if not array.dtype == res_dtype: + src_ = dpt.astype(src_, res_dtype) hev, _ = ti._copy_usm_ndarray_for_reshape( - src=array, + src=src_, dst=res[fill_start:fill_end], shift=0, sycl_queue=exec_q, diff --git a/dpctl/tensor/_slicing.pxi b/dpctl/tensor/_slicing.pxi index 361dd906c3..4f45d57391 100644 --- a/dpctl/tensor/_slicing.pxi +++ b/dpctl/tensor/_slicing.pxi @@ -33,9 +33,13 @@ cdef Py_ssize_t _slice_len( if sl_start == sl_stop: return 0 if sl_step > 0: + if sl_start > sl_stop: + return 0 # 1 + argmax k such htat sl_start + sl_step*k < sl_stop return 1 + ((sl_stop - sl_start - 1) // sl_step) else: + if sl_start < sl_stop: + return 0 return 1 + ((sl_stop - sl_start + 1) // sl_step) @@ -221,6 +225,9 @@ def _basic_slice_meta(ind, shape : tuple, strides : tuple, offset : int): k_new = k + ellipses_count new_shape.extend(shape[k:k_new]) new_strides.extend(strides[k:k_new]) + if any(dim == 0 for dim in shape[k:k_new]): + is_empty = True + new_offset = offset k = k_new elif ind_i is None: new_shape.append(1) @@ -236,6 +243,7 @@ def _basic_slice_meta(ind, shape : tuple, strides : tuple, offset : int): new_offset = new_offset + sl_start * strides[k] if sh_i == 0: is_empty = True + new_offset = offset k = k_new elif _is_boolean(ind_i): new_shape.append(1 if ind_i else 0) diff --git a/dpctl/tensor/_stride_utils.pxi b/dpctl/tensor/_stride_utils.pxi index 896c31e65a..bc7349a5e6 100644 --- a/dpctl/tensor/_stride_utils.pxi +++ b/dpctl/tensor/_stride_utils.pxi @@ -64,6 +64,7 @@ cdef int _from_input_shape_strides( cdef int j cdef bint all_incr = 1 cdef bint all_decr = 1 + cdef bint strides_inspected = 0 cdef Py_ssize_t elem_count = 1 cdef Py_ssize_t min_shift = 0 cdef Py_ssize_t max_shift = 0 @@ -165,6 +166,7 @@ cdef int _from_input_shape_strides( while (j < nd and shape_arr[j] == 1): j = j + 1 if j < nd: + strides_inspected = 1 if all_incr: all_incr = ( (strides_arr[i] > 0) and @@ -179,7 +181,18 @@ cdef int _from_input_shape_strides( ) i = j else: + if not strides_inspected: + # all dimensions have size 1 except + # dimension 'i'. Array is both C and F + # contiguous + strides_inspected = 1 + all_incr = (strides_arr[i] == 1) + all_decr = all_incr break + # should only set contig flags on actually obtained + # values, rather than default values + all_incr = all_incr and strides_inspected + all_decr = all_decr and strides_inspected if all_incr and all_decr: contig[0] = (USM_ARRAY_C_CONTIGUOUS | USM_ARRAY_F_CONTIGUOUS) elif all_incr: diff --git a/dpctl/tensor/_type_utils.py b/dpctl/tensor/_type_utils.py index fb2223f292..b576764689 100644 --- a/dpctl/tensor/_type_utils.py +++ b/dpctl/tensor/_type_utils.py @@ -14,8 +14,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import builtins - import dpctl.tensor as dpt import dpctl.tensor._tensor_impl as ti @@ -116,96 +114,6 @@ def _can_cast(from_: dpt.dtype, to_: dpt.dtype, _fp16: bool, _fp64: bool): return can_cast_v -def _empty_like_orderK(X, dt, usm_type=None, dev=None): - """Returns empty array like `x`, using order='K' - - For an array `x` that was obtained by permutation of a contiguous - array the returned array will have the same shape and the same - strides as `x`. - """ - if not isinstance(X, dpt.usm_ndarray): - raise TypeError(f"Expected usm_ndarray, got {type(X)}") - if usm_type is None: - usm_type = X.usm_type - if dev is None: - dev = X.device - fl = X.flags - if fl["C"] or X.size <= 1: - return dpt.empty_like( - X, dtype=dt, usm_type=usm_type, device=dev, order="C" - ) - elif fl["F"]: - return dpt.empty_like( - X, dtype=dt, usm_type=usm_type, device=dev, order="F" - ) - st = list(X.strides) - perm = sorted( - range(X.ndim), key=lambda d: builtins.abs(st[d]), reverse=True - ) - inv_perm = sorted(range(X.ndim), key=lambda i: perm[i]) - st_sorted = [st[i] for i in perm] - sh = X.shape - sh_sorted = tuple(sh[i] for i in perm) - R = dpt.empty(sh_sorted, dtype=dt, usm_type=usm_type, device=dev, order="C") - if min(st_sorted) < 0: - sl = tuple( - slice(None, None, -1) - if st_sorted[i] < 0 - else slice(None, None, None) - for i in range(X.ndim) - ) - R = R[sl] - return dpt.permute_dims(R, inv_perm) - - -def _empty_like_pair_orderK(X1, X2, dt, res_shape, usm_type, dev): - if not isinstance(X1, dpt.usm_ndarray): - raise TypeError(f"Expected usm_ndarray, got {type(X1)}") - if not isinstance(X2, dpt.usm_ndarray): - raise TypeError(f"Expected usm_ndarray, got {type(X2)}") - nd1 = X1.ndim - nd2 = X2.ndim - if nd1 > nd2 and X1.shape == res_shape: - return _empty_like_orderK(X1, dt, usm_type, dev) - elif nd1 < nd2 and X2.shape == res_shape: - return _empty_like_orderK(X2, dt, usm_type, dev) - fl1 = X1.flags - fl2 = X2.flags - if fl1["C"] or fl2["C"]: - return dpt.empty( - res_shape, dtype=dt, usm_type=usm_type, device=dev, order="C" - ) - if fl1["F"] and fl2["F"]: - return dpt.empty( - res_shape, dtype=dt, usm_type=usm_type, device=dev, order="F" - ) - st1 = list(X1.strides) - st2 = list(X2.strides) - max_ndim = max(nd1, nd2) - st1 += [0] * (max_ndim - len(st1)) - st2 += [0] * (max_ndim - len(st2)) - perm = sorted( - range(max_ndim), - key=lambda d: (builtins.abs(st1[d]), builtins.abs(st2[d])), - reverse=True, - ) - inv_perm = sorted(range(max_ndim), key=lambda i: perm[i]) - st1_sorted = [st1[i] for i in perm] - st2_sorted = [st2[i] for i in perm] - sh = res_shape - sh_sorted = tuple(sh[i] for i in perm) - R = dpt.empty(sh_sorted, dtype=dt, usm_type=usm_type, device=dev, order="C") - if max(min(st1_sorted), min(st2_sorted)) < 0: - sl = tuple( - slice(None, None, -1) - if (st1_sorted[i] < 0 and st2_sorted[i] < 0) - else slice(None, None, None) - for i in range(nd1) - ) - R = R[sl] - return dpt.permute_dims(R, inv_perm) - - def _to_device_supported_dtype(dt, dev): has_fp16 = dev.has_aspect_fp16 has_fp64 = dev.has_aspect_fp64 @@ -339,8 +247,6 @@ def _find_inplace_dtype(lhs_dtype, rhs_dtype, query_fn, sycl_dev): "_find_buf_dtype", "_find_buf_dtype2", "_find_inplace_dtype", - "_empty_like_orderK", - "_empty_like_pair_orderK", "_to_device_supported_dtype", "_acceptance_fn_default", "_acceptance_fn_divide", diff --git a/dpctl/tensor/_usmarray.pyx b/dpctl/tensor/_usmarray.pyx index 5b1bd5f6a3..98986d84d6 100644 --- a/dpctl/tensor/_usmarray.pyx +++ b/dpctl/tensor/_usmarray.pyx @@ -764,6 +764,8 @@ cdef class usm_ndarray: ind, (self).shape, ( self).strides, self.get_offset()) cdef usm_ndarray res + cdef int i = 0 + cdef bint matching = 1 if len(_meta) < 5: raise RuntimeError @@ -787,7 +789,20 @@ cdef class usm_ndarray: from ._copy_utils import _extract_impl, _nonzero_impl, _take_multi_index if len(adv_ind) == 1 and adv_ind[0].dtype == dpt_bool: - return _extract_impl(res, adv_ind[0], axis=adv_ind_start_p) + key_ = adv_ind[0] + adv_ind_end_p = key_.ndim + adv_ind_start_p + if adv_ind_end_p > res.ndim: + raise IndexError("too many indices for the array") + key_shape = key_.shape + arr_shape = res.shape[adv_ind_start_p:adv_ind_end_p] + for i in range(key_.ndim): + if matching: + if not key_shape[i] == arr_shape[i] and key_shape[i] > 0: + matching = 0 + if not matching: + raise IndexError("boolean index did not match indexed array in dimensions") + res = _extract_impl(res, key_, axis=adv_ind_start_p) + return res if any(ind.dtype == dpt_bool for ind in adv_ind): adv_ind_int = list() @@ -801,7 +816,7 @@ cdef class usm_ndarray: return _take_multi_index(res, adv_ind, adv_ind_start_p) - def to_device(self, target): + def to_device(self, target, stream=None): """ to_device(target_device) Transfers this array to specified target device. @@ -841,6 +856,14 @@ cdef class usm_ndarray: cdef c_dpctl.DPCTLSyclQueueRef QRef = NULL cdef c_dpmem._Memory arr_buf d = Device.create_device(target) + + if (stream is None or type(stream) is not dpctl.SyclQueue or + stream == self.sycl_queue): + pass + else: + ev = self.sycl_queue.submit_barrier() + stream.submit_barrier(dependent_events=[ev]) + if (d.sycl_context == self.sycl_context): arr_buf = self.usm_data QRef = ( d.sycl_queue).get_queue_ref() @@ -857,7 +880,7 @@ cdef class usm_ndarray: strides=self.strides, offset=self.get_offset() ) - res.flags_ = self.flags.flags + res.flags_ = self.flags_ return res else: nbytes = self.usm_data.nbytes @@ -872,7 +895,7 @@ cdef class usm_ndarray: strides=self.strides, offset=self.get_offset() ) - res.flags_ = self.flags.flags + res.flags_ = self.flags_ return res def _set_namespace(self, mod): @@ -884,12 +907,14 @@ cdef class usm_ndarray: Returns array namespace, member functions of which implement data API. """ - return self.array_namespace_ + return self.array_namespace_ if self.array_namespace_ is not None else dpctl.tensor def __bool__(self): if self.size == 1: mem_view = dpmem.as_usm_memory(self) - return mem_view.copy_to_host().view(self.dtype).__bool__() + host_buf = mem_view.copy_to_host() + view = host_buf.view(self.dtype) + return view.__bool__() if self.size == 0: raise ValueError( @@ -898,13 +923,15 @@ cdef class usm_ndarray: raise ValueError( "The truth value of an array with more than one element is " - "ambiguous. Use a.any() or a.all()" + "ambiguous. Use dpctl.tensor.any() or dpctl.tensor.all()" ) def __float__(self): if self.size == 1: mem_view = dpmem.as_usm_memory(self) - return mem_view.copy_to_host().view(self.dtype).__float__() + host_buf = mem_view.copy_to_host() + view = host_buf.view(self.dtype) + return view.__float__() raise ValueError( "only size-1 arrays can be converted to Python scalars" @@ -913,7 +940,9 @@ cdef class usm_ndarray: def __complex__(self): if self.size == 1: mem_view = dpmem.as_usm_memory(self) - return mem_view.copy_to_host().view(self.dtype).__complex__() + host_buf = mem_view.copy_to_host() + view = host_buf.view(self.dtype) + return view.__complex__() raise ValueError( "only size-1 arrays can be converted to Python scalars" @@ -922,7 +951,9 @@ cdef class usm_ndarray: def __int__(self): if self.size == 1: mem_view = dpmem.as_usm_memory(self) - return mem_view.copy_to_host().view(self.dtype).__int__() + host_buf = mem_view.copy_to_host() + view = host_buf.view(self.dtype) + return view.__int__() raise ValueError( "only size-1 arrays can be converted to Python scalars" @@ -957,9 +988,9 @@ cdef class usm_ndarray: def __and__(first, other): "See comment in __add__" if isinstance(first, usm_ndarray): - return _dispatch_binary_elementwise(first, "logical_and", other) + return _dispatch_binary_elementwise(first, "bitwise_and", other) elif isinstance(other, usm_ndarray): - return _dispatch_binary_elementwise2(first, "logical_and", other) + return _dispatch_binary_elementwise2(first, "bitwise_and", other) return NotImplemented def __dlpack__(self, stream=None): @@ -1023,7 +1054,7 @@ cdef class usm_ndarray: return _dispatch_binary_elementwise(self, "greater", other) def __invert__(self): - return _dispatch_unary_elementwise(self, "invert") + return _dispatch_unary_elementwise(self, "bitwise_invert") def __le__(self, other): return _dispatch_binary_elementwise(self, "less_equal", other) @@ -1037,9 +1068,9 @@ cdef class usm_ndarray: def __lshift__(first, other): "See comment in __add__" if isinstance(first, usm_ndarray): - return _dispatch_binary_elementwise(first, "left_shift", other) + return _dispatch_binary_elementwise(first, "bitwise_left_shift", other) elif isinstance(other, usm_ndarray): - return _dispatch_binary_elementwise2(first, "left_shift", other) + return _dispatch_binary_elementwise2(first, "bitwise_left_shift", other) return NotImplemented def __lt__(self, other): @@ -1056,9 +1087,9 @@ cdef class usm_ndarray: def __mod__(first, other): "See comment in __add__" if isinstance(first, usm_ndarray): - return _dispatch_binary_elementwise(first, "mod", other) + return _dispatch_binary_elementwise(first, "remainder", other) elif isinstance(other, usm_ndarray): - return _dispatch_binary_elementwise2(first, "mod", other) + return _dispatch_binary_elementwise2(first, "remainder", other) return NotImplemented def __mul__(first, other): @@ -1078,9 +1109,9 @@ cdef class usm_ndarray: def __or__(first, other): "See comment in __add__" if isinstance(first, usm_ndarray): - return _dispatch_binary_elementwise(first, "logical_or", other) + return _dispatch_binary_elementwise(first, "bitwise_or", other) elif isinstance(other, usm_ndarray): - return _dispatch_binary_elementwise2(first, "logical_or", other) + return _dispatch_binary_elementwise2(first, "bitwise_or", other) return NotImplemented def __pos__(self): @@ -1090,17 +1121,17 @@ cdef class usm_ndarray: "See comment in __add__" if mod is None: if isinstance(first, usm_ndarray): - return _dispatch_binary_elementwise(first, "power", other) + return _dispatch_binary_elementwise(first, "pow", other) elif isinstance(other, usm_ndarray): - return _dispatch_binary_elementwise(first, "power", other) + return _dispatch_binary_elementwise(first, "pow", other) return NotImplemented def __rshift__(first, other): "See comment in __add__" if isinstance(first, usm_ndarray): - return _dispatch_binary_elementwise(first, "right_shift", other) + return _dispatch_binary_elementwise(first, "bitwise_right_shift", other) elif isinstance(other, usm_ndarray): - return _dispatch_binary_elementwise2(first, "right_shift", other) + return _dispatch_binary_elementwise2(first, "bitwise_right_shift", other) return NotImplemented def __setitem__(self, key, rhs): @@ -1202,57 +1233,57 @@ cdef class usm_ndarray: def __truediv__(first, other): "See comment in __add__" if isinstance(first, usm_ndarray): - return _dispatch_binary_elementwise(first, "true_divide", other) + return _dispatch_binary_elementwise(first, "divide", other) elif isinstance(other, usm_ndarray): - return _dispatch_binary_elementwise2(first, "true_divide", other) + return _dispatch_binary_elementwise2(first, "divide", other) return NotImplemented def __xor__(first, other): "See comment in __add__" if isinstance(first, usm_ndarray): - return _dispatch_binary_elementwise(first, "logical_xor", other) + return _dispatch_binary_elementwise(first, "bitwise_xor", other) elif isinstance(other, usm_ndarray): - return _dispatch_binary_elementwise2(first, "logical_xor", other) + return _dispatch_binary_elementwise2(first, "bitwise_xor", other) return NotImplemented def __radd__(self, other): return _dispatch_binary_elementwise(self, "add", other) def __rand__(self, other): - return _dispatch_binary_elementwise(self, "logical_and", other) + return _dispatch_binary_elementwise(self, "bitwise_and", other) def __rfloordiv__(self, other): return _dispatch_binary_elementwise2(other, "floor_divide", self) def __rlshift__(self, other): - return _dispatch_binary_elementwise2(other, "left_shift", self) + return _dispatch_binary_elementwise2(other, "bitwise_left_shift", self) def __rmatmul__(self, other): return _dispatch_binary_elementwise2(other, "matmul", self) def __rmod__(self, other): - return _dispatch_binary_elementwise2(other, "mod", self) + return _dispatch_binary_elementwise2(other, "remainder", self) def __rmul__(self, other): return _dispatch_binary_elementwise(self, "multiply", other) def __ror__(self, other): - return _dispatch_binary_elementwise(self, "logical_or", other) + return _dispatch_binary_elementwise(self, "bitwise_or", other) def __rpow__(self, other): - return _dispatch_binary_elementwise2(other, "power", self) + return _dispatch_binary_elementwise2(other, "pow", self) def __rrshift__(self, other): - return _dispatch_binary_elementwise2(other, "right_shift", self) + return _dispatch_binary_elementwise2(other, "bitwise_right_shift", self) def __rsub__(self, other): return _dispatch_binary_elementwise2(other, "subtract", self) def __rtruediv__(self, other): - return _dispatch_binary_elementwise2(other, "true_divide", self) + return _dispatch_binary_elementwise2(other, "divide", self) def __rxor__(self, other): - return _dispatch_binary_elementwise2(other, "logical_xor", self) + return _dispatch_binary_elementwise2(other, "bitwise_xor", self) def __iadd__(self, other): from ._elementwise_funcs import add diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/expm1.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/expm1.hpp index 7b59285884..b996a6d0ec 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/expm1.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/expm1.hpp @@ -28,6 +28,7 @@ #include #include #include +#include #include #include "kernels/elementwise_functions/common.hpp" @@ -73,6 +74,46 @@ template struct Expm1Functor const realT x = std::real(in); const realT y = std::imag(in); + // special cases + if (std::isinf(x)) { + if (x > realT(0)) { + // positive infinity cases + if (!std::isfinite(y)) { + return resT{x, std::numeric_limits::quiet_NaN()}; + } + else if (y == realT(0)) { + return in; + } + else { + return (resT{std::copysign(x, std::cos(y)), + std::copysign(x, std::sin(y))}); + } + } + else { + // negative infinity cases + if (!std::isfinite(y)) { + // copy sign of y to guarantee + // conj(expm1(x)) == expm1(conj(x)) + return resT{realT(-1), std::copysign(realT(0), y)}; + } + else { + return resT{realT(-1), + std::copysign(realT(0), std::sin(y))}; + } + } + } + + if (std::isnan(x)) { + if (y == realT(0)) { + return in; + } + else { + return resT{std::numeric_limits::quiet_NaN(), + std::numeric_limits::quiet_NaN()}; + } + } + + // x, y finite numbers realT cosY_val; const realT sinY_val = sycl::sincos(y, &cosY_val); const realT sinhalfY_val = std::sin(y / 2); diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/greater.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/greater.hpp index d1db67f116..ab2ce9b5c3 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/greater.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/greater.hpp @@ -30,6 +30,7 @@ #include #include +#include "utils/math_utils.hpp" #include "utils/offset_utils.hpp" #include "utils/type_dispatch.hpp" #include "utils/type_utils.hpp" @@ -67,12 +68,8 @@ template struct GreaterFunctor tu_ns::is_complex::value) { static_assert(std::is_same_v); - using realT = typename argT1::value_type; - realT real1 = std::real(in1); - realT real2 = std::real(in2); - - return (real1 == real2) ? (std::imag(in1) > std::imag(in2)) - : real1 > real2; + using dpctl::tensor::math_utils::greater_complex; + return greater_complex(in1, in2); } else { return (in1 > in2); diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/greater_equal.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/greater_equal.hpp index 9b6fac1214..fa2ece210f 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/greater_equal.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/greater_equal.hpp @@ -30,6 +30,7 @@ #include #include +#include "utils/math_utils.hpp" #include "utils/offset_utils.hpp" #include "utils/type_dispatch.hpp" #include "utils/type_utils.hpp" @@ -68,12 +69,8 @@ struct GreaterEqualFunctor tu_ns::is_complex::value) { static_assert(std::is_same_v); - using realT = typename argT1::value_type; - realT real1 = std::real(in1); - realT real2 = std::real(in2); - - return (real1 == real2) ? (std::imag(in1) >= std::imag(in2)) - : real1 >= real2; + using dpctl::tensor::math_utils::greater_equal_complex; + return greater_equal_complex(in1, in2); } else { return (in1 >= in2); diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/less.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/less.hpp index 4829c30f54..36675a2e19 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/less.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/less.hpp @@ -29,6 +29,7 @@ #include #include +#include "utils/math_utils.hpp" #include "utils/offset_utils.hpp" #include "utils/type_dispatch.hpp" #include "utils/type_utils.hpp" @@ -66,12 +67,8 @@ template struct LessFunctor tu_ns::is_complex::value) { static_assert(std::is_same_v); - using realT = typename argT1::value_type; - realT real1 = std::real(in1); - realT real2 = std::real(in2); - - return (real1 == real2) ? (std::imag(in1) < std::imag(in2)) - : real1 < real2; + using dpctl::tensor::math_utils::less_complex; + return less_complex(in1, in2); } else { return (in1 < in2); diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/less_equal.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/less_equal.hpp index 31f5c0b4ba..9b740219fc 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/less_equal.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/less_equal.hpp @@ -30,6 +30,7 @@ #include #include +#include "utils/math_utils.hpp" #include "utils/offset_utils.hpp" #include "utils/type_dispatch.hpp" #include "utils/type_utils.hpp" @@ -67,12 +68,8 @@ template struct LessEqualFunctor tu_ns::is_complex::value) { static_assert(std::is_same_v); - using realT = typename argT1::value_type; - realT real1 = std::real(in1); - realT real2 = std::real(in2); - - return (real1 == real2) ? (std::imag(in1) <= std::imag(in2)) - : real1 <= real2; + using dpctl::tensor::math_utils::less_equal_complex; + return less_equal_complex(in1, in2); } else { return (in1 <= in2); diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp index 39ba5c3fcf..b718a5f991 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp @@ -28,6 +28,7 @@ #include #include #include +#include #include #include "utils/offset_utils.hpp" @@ -55,16 +56,12 @@ using dpctl::tensor::type_utils::vec_cast; template struct LogAddExpFunctor { - using supports_sg_loadstore = typename std::negation< - std::disjunction, tu_ns::is_complex>>; - using supports_vec = typename std::negation< - std::disjunction, tu_ns::is_complex>>; + using supports_sg_loadstore = std::true_type; + using supports_vec = std::true_type; resT operator()(const argT1 &in1, const argT2 &in2) { - resT max = std::max(in1, in2); - resT min = std::min(in1, in2); - return max + std::log1p(std::exp(min - max)); + return impl(in1, in2); } template @@ -72,16 +69,48 @@ template struct LogAddExpFunctor const sycl::vec &in2) { sycl::vec res; - auto diff = in1 - in2; + auto diff = in1 - in2; // take advantange of faster vec arithmetic #pragma unroll for (int i = 0; i < vec_sz; ++i) { - resT max = std::max(in1[i], in2[i]); - res[i] = max + std::log1p(std::exp(std::abs(diff[i]))); + if (std::isfinite(diff[i])) { + res[i] = std::max(in1[i], in2[i]) + + impl_finite(-std::abs(diff[i])); + } + else { + res[i] = impl(in1[i], in2[i]); + } } return res; } + +private: + template T impl(T const &in1, T const &in2) + { + if (in1 == in2) { // handle signed infinities + const T log2 = std::log(T(2)); + return in1 + log2; + } + else { + const T tmp = in1 - in2; + if (tmp > 0) { + return in1 + std::log1p(std::exp(-tmp)); + } + else if (tmp <= 0) { + return in2 + std::log1p(std::exp(tmp)); + } + else { + return std::numeric_limits::quiet_NaN(); + } + } + } + + template T impl_finite(T const &in) + { + return (in > 0) ? (in + std::log1p(std::exp(-in))) + : std::log1p(std::exp(in)); + } }; template +#include +#include +#include + +#include "utils/math_utils.hpp" +#include "utils/offset_utils.hpp" +#include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include + +namespace dpctl +{ +namespace tensor +{ +namespace kernels +{ +namespace maximum +{ + +namespace py = pybind11; +namespace td_ns = dpctl::tensor::type_dispatch; +namespace tu_ns = dpctl::tensor::type_utils; + +template struct MaximumFunctor +{ + + using supports_sg_loadstore = std::negation< + std::disjunction, tu_ns::is_complex>>; + using supports_vec = std::conjunction< + std::is_same, + std::negation, + tu_ns::is_complex>>>; + + resT operator()(const argT1 &in1, const argT2 &in2) + { + if constexpr (tu_ns::is_complex::value || + tu_ns::is_complex::value) + { + static_assert(std::is_same_v); + using dpctl::tensor::math_utils::max_complex; + return max_complex(in1, in2); + } + else if constexpr (std::is_floating_point_v || + std::is_same_v) + return (std::isnan(in1) || in1 > in2) ? in1 : in2; + else + return (in1 > in2) ? in1 : in2; + } + + template + sycl::vec operator()(const sycl::vec &in1, + const sycl::vec &in2) + { + sycl::vec res; +#pragma unroll + for (int i = 0; i < vec_sz; ++i) { + if constexpr (std::is_floating_point_v) + res[i] = + (sycl::isnan(in1[i]) || in1[i] > in2[i]) ? in1[i] : in2[i]; + else + res[i] = (in1[i] > in2[i]) ? in1[i] : in2[i]; + } + return res; + } +}; + +template +using MaximumContigFunctor = + elementwise_common::BinaryContigFunctor, + vec_sz, + n_vecs>; + +template +using MaximumStridedFunctor = elementwise_common::BinaryStridedFunctor< + argT1, + argT2, + resT, + IndexerT, + MaximumFunctor>; + +template struct MaximumOutputType +{ + using value_type = typename std::disjunction< // disjunction is C++17 + // feature, supported by DPC++ + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + T2, + std::complex, + std::complex>, + td_ns::BinaryTypeMapResultEntry, + T2, + std::complex, + std::complex>, + td_ns::DefaultResultEntry>::result_type; +}; + +template +class maximum_contig_kernel; + +template +sycl::event maximum_contig_impl(sycl::queue exec_q, + size_t nelems, + const char *arg1_p, + py::ssize_t arg1_offset, + const char *arg2_p, + py::ssize_t arg2_offset, + char *res_p, + py::ssize_t res_offset, + const std::vector &depends = {}) +{ + return elementwise_common::binary_contig_impl< + argTy1, argTy2, MaximumOutputType, MaximumContigFunctor, + maximum_contig_kernel>(exec_q, nelems, arg1_p, arg1_offset, arg2_p, + arg2_offset, res_p, res_offset, depends); +} + +template struct MaximumContigFactory +{ + fnT get() + { + if constexpr (std::is_same_v< + typename MaximumOutputType::value_type, void>) + { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = maximum_contig_impl; + return fn; + } + } +}; + +template struct MaximumTypeMapFactory +{ + /*! @brief get typeid for output type of maximum(T1 x, T2 y) */ + std::enable_if_t::value, int> get() + { + using rT = typename MaximumOutputType::value_type; + ; + return td_ns::GetTypeid{}.get(); + } +}; + +template +class maximum_strided_kernel; + +template +sycl::event +maximum_strided_impl(sycl::queue exec_q, + size_t nelems, + int nd, + const py::ssize_t *shape_and_strides, + const char *arg1_p, + py::ssize_t arg1_offset, + const char *arg2_p, + py::ssize_t arg2_offset, + char *res_p, + py::ssize_t res_offset, + const std::vector &depends, + const std::vector &additional_depends) +{ + return elementwise_common::binary_strided_impl< + argTy1, argTy2, MaximumOutputType, MaximumStridedFunctor, + maximum_strided_kernel>(exec_q, nelems, nd, shape_and_strides, arg1_p, + arg1_offset, arg2_p, arg2_offset, res_p, + res_offset, depends, additional_depends); +} + +template struct MaximumStridedFactory +{ + fnT get() + { + if constexpr (std::is_same_v< + typename MaximumOutputType::value_type, void>) + { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = maximum_strided_impl; + return fn; + } + } +}; + +} // namespace maximum +} // namespace kernels +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/minimum.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/minimum.hpp new file mode 100644 index 0000000000..d19c829a6c --- /dev/null +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/minimum.hpp @@ -0,0 +1,279 @@ +//=== minimum.hpp - Binary function MINIMUM ------ *-C++-*--/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines kernels for elementwise evaluation of MINIMUM(x1, x2) +/// function. +//===---------------------------------------------------------------------===// + +#pragma once +#include +#include +#include +#include + +#include "utils/math_utils.hpp" +#include "utils/offset_utils.hpp" +#include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include + +namespace dpctl +{ +namespace tensor +{ +namespace kernels +{ +namespace minimum +{ + +namespace py = pybind11; +namespace td_ns = dpctl::tensor::type_dispatch; +namespace tu_ns = dpctl::tensor::type_utils; + +template struct MinimumFunctor +{ + + using supports_sg_loadstore = std::negation< + std::disjunction, tu_ns::is_complex>>; + using supports_vec = std::conjunction< + std::is_same, + std::negation, + tu_ns::is_complex>>>; + + resT operator()(const argT1 &in1, const argT2 &in2) + { + if constexpr (tu_ns::is_complex::value || + tu_ns::is_complex::value) + { + static_assert(std::is_same_v); + using dpctl::tensor::math_utils::min_complex; + return min_complex(in1, in2); + } + else if constexpr (std::is_floating_point_v || + std::is_same_v) + return (std::isnan(in1) || in1 < in2) ? in1 : in2; + else + return (in1 < in2) ? in1 : in2; + } + + template + sycl::vec operator()(const sycl::vec &in1, + const sycl::vec &in2) + { + sycl::vec res; +#pragma unroll + for (int i = 0; i < vec_sz; ++i) { + if constexpr (std::is_floating_point_v) + res[i] = + (sycl::isnan(in1[i]) || in1[i] < in2[i]) ? in1[i] : in2[i]; + else + res[i] = (in1[i] < in2[i]) ? in1[i] : in2[i]; + } + return res; + } +}; + +template +using MinimumContigFunctor = + elementwise_common::BinaryContigFunctor, + vec_sz, + n_vecs>; + +template +using MinimumStridedFunctor = elementwise_common::BinaryStridedFunctor< + argT1, + argT2, + resT, + IndexerT, + MinimumFunctor>; + +template struct MinimumOutputType +{ + using value_type = typename std::disjunction< // disjunction is C++17 + // feature, supported by DPC++ + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + T2, + std::complex, + std::complex>, + td_ns::BinaryTypeMapResultEntry, + T2, + std::complex, + std::complex>, + td_ns::DefaultResultEntry>::result_type; +}; + +template +class minimum_contig_kernel; + +template +sycl::event minimum_contig_impl(sycl::queue exec_q, + size_t nelems, + const char *arg1_p, + py::ssize_t arg1_offset, + const char *arg2_p, + py::ssize_t arg2_offset, + char *res_p, + py::ssize_t res_offset, + const std::vector &depends = {}) +{ + return elementwise_common::binary_contig_impl< + argTy1, argTy2, MinimumOutputType, MinimumContigFunctor, + minimum_contig_kernel>(exec_q, nelems, arg1_p, arg1_offset, arg2_p, + arg2_offset, res_p, res_offset, depends); +} + +template struct MinimumContigFactory +{ + fnT get() + { + if constexpr (std::is_same_v< + typename MinimumOutputType::value_type, void>) + { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = minimum_contig_impl; + return fn; + } + } +}; + +template struct MinimumTypeMapFactory +{ + /*! @brief get typeid for output type of minimum(T1 x, T2 y) */ + std::enable_if_t::value, int> get() + { + using rT = typename MinimumOutputType::value_type; + ; + return td_ns::GetTypeid{}.get(); + } +}; + +template +class minimum_strided_kernel; + +template +sycl::event +minimum_strided_impl(sycl::queue exec_q, + size_t nelems, + int nd, + const py::ssize_t *shape_and_strides, + const char *arg1_p, + py::ssize_t arg1_offset, + const char *arg2_p, + py::ssize_t arg2_offset, + char *res_p, + py::ssize_t res_offset, + const std::vector &depends, + const std::vector &additional_depends) +{ + return elementwise_common::binary_strided_impl< + argTy1, argTy2, MinimumOutputType, MinimumStridedFunctor, + minimum_strided_kernel>(exec_q, nelems, nd, shape_and_strides, arg1_p, + arg1_offset, arg2_p, arg2_offset, res_p, + res_offset, depends, additional_depends); +} + +template struct MinimumStridedFactory +{ + fnT get() + { + if constexpr (std::is_same_v< + typename MinimumOutputType::value_type, void>) + { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = minimum_strided_impl; + return fn; + } + } +}; + +} // namespace minimum +} // namespace kernels +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/include/utils/math_utils.hpp b/dpctl/tensor/libtensor/include/utils/math_utils.hpp new file mode 100644 index 0000000000..d724e03e35 --- /dev/null +++ b/dpctl/tensor/libtensor/include/utils/math_utils.hpp @@ -0,0 +1,120 @@ +//===------- math_utils.hpp - Implementation of math utils -------*-C++-*/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines math utility functions. +//===----------------------------------------------------------------------===// + +#pragma once +#include +#include + +namespace dpctl +{ +namespace tensor +{ +namespace math_utils +{ + +template bool less_complex(const T &x1, const T &x2) +{ + using realT = typename T::value_type; + realT real1 = std::real(x1); + realT real2 = std::real(x2); + realT imag1 = std::imag(x1); + realT imag2 = std::imag(x2); + + return (real1 == real2) + ? (imag1 < imag2) + : (real1 < real2 && !std::isnan(imag1) && !std::isnan(imag2)); +} + +template bool greater_complex(const T &x1, const T &x2) +{ + using realT = typename T::value_type; + realT real1 = std::real(x1); + realT real2 = std::real(x2); + realT imag1 = std::imag(x1); + realT imag2 = std::imag(x2); + + return (real1 == real2) + ? (imag1 > imag2) + : (real1 > real2 && !std::isnan(imag1) && !std::isnan(imag2)); +} + +template bool less_equal_complex(const T &x1, const T &x2) +{ + using realT = typename T::value_type; + realT real1 = std::real(x1); + realT real2 = std::real(x2); + realT imag1 = std::imag(x1); + realT imag2 = std::imag(x2); + + return (real1 == real2) + ? (imag1 <= imag2) + : (real1 < real2 && !std::isnan(imag1) && !std::isnan(imag2)); +} + +template bool greater_equal_complex(const T &x1, const T &x2) +{ + using realT = typename T::value_type; + realT real1 = std::real(x1); + realT real2 = std::real(x2); + realT imag1 = std::imag(x1); + realT imag2 = std::imag(x2); + + return (real1 == real2) + ? (imag1 >= imag2) + : (real1 > real2 && !std::isnan(imag1) && !std::isnan(imag2)); +} + +template T max_complex(const T &x1, const T &x2) +{ + using realT = typename T::value_type; + realT real1 = std::real(x1); + realT real2 = std::real(x2); + realT imag1 = std::imag(x1); + realT imag2 = std::imag(x2); + + bool isnan_imag1 = std::isnan(imag1); + bool gt = (real1 == real2) + ? (imag1 > imag2) + : (real1 > real2 && !isnan_imag1 && !std::isnan(imag2)); + return (std::isnan(real1) || isnan_imag1 || gt) ? x1 : x2; +} + +template T min_complex(const T &x1, const T &x2) +{ + using realT = typename T::value_type; + realT real1 = std::real(x1); + realT real2 = std::real(x2); + realT imag1 = std::imag(x1); + realT imag2 = std::imag(x2); + + bool isnan_imag1 = std::isnan(imag1); + bool lt = (real1 == real2) + ? (imag1 < imag2) + : (real1 < real2 && !isnan_imag1 && !std::isnan(imag2)); + return (std::isnan(real1) || isnan_imag1 || lt) ? x1 : x2; +} + +} // namespace math_utils +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/device_support_queries.cpp b/dpctl/tensor/libtensor/source/device_support_queries.cpp index 16ae43ba97..946c36ad26 100644 --- a/dpctl/tensor/libtensor/source/device_support_queries.cpp +++ b/dpctl/tensor/libtensor/source/device_support_queries.cpp @@ -71,6 +71,11 @@ std::string _default_device_bool_type(sycl::device) return "b1"; } +std::string _default_device_index_type(sycl::device) +{ + return "i8"; +} + sycl::device _extract_device(py::object arg) { auto const &api = dpctl::detail::dpctl_capi::get(); @@ -115,6 +120,12 @@ std::string default_device_complex_type(py::object arg) return _default_device_complex_type(d); } +std::string default_device_index_type(py::object arg) +{ + sycl::device d = _extract_device(arg); + return _default_device_index_type(d); +} + } // namespace py_internal } // namespace tensor } // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/device_support_queries.hpp b/dpctl/tensor/libtensor/source/device_support_queries.hpp index a54835fc75..6626b3502a 100644 --- a/dpctl/tensor/libtensor/source/device_support_queries.hpp +++ b/dpctl/tensor/libtensor/source/device_support_queries.hpp @@ -41,6 +41,7 @@ extern std::string default_device_fp_type(py::object); extern std::string default_device_int_type(py::object); extern std::string default_device_bool_type(py::object); extern std::string default_device_complex_type(py::object); +extern std::string default_device_index_type(py::object); } // namespace py_internal } // namespace tensor diff --git a/dpctl/tensor/libtensor/source/elementwise_functions.cpp b/dpctl/tensor/libtensor/source/elementwise_functions.cpp index 22eb85eea9..cc95cecb38 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions.cpp @@ -74,6 +74,8 @@ #include "kernels/elementwise_functions/logical_not.hpp" #include "kernels/elementwise_functions/logical_or.hpp" #include "kernels/elementwise_functions/logical_xor.hpp" +#include "kernels/elementwise_functions/maximum.hpp" +#include "kernels/elementwise_functions/minimum.hpp" #include "kernels/elementwise_functions/multiply.hpp" #include "kernels/elementwise_functions/negative.hpp" #include "kernels/elementwise_functions/not_equal.hpp" @@ -1794,6 +1796,86 @@ void populate_logical_xor_dispatch_tables(void) }; } // namespace impl +// B??: ==== MAXIMUM (x1, x2) +namespace impl +{ + +namespace maximum_fn_ns = dpctl::tensor::kernels::maximum; + +static binary_contig_impl_fn_ptr_t + maximum_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static int maximum_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + maximum_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +void populate_maximum_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = maximum_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::MaximumTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(maximum_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::MaximumStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(maximum_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::MaximumContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(maximum_contig_dispatch_table); +}; + +} // namespace impl + +// B??: ==== MINIMUM (x1, x2) +namespace impl +{ + +namespace minimum_fn_ns = dpctl::tensor::kernels::minimum; + +static binary_contig_impl_fn_ptr_t + minimum_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static int minimum_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + minimum_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +void populate_minimum_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = minimum_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::MinimumTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(minimum_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::MinimumStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(minimum_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::MinimumContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(minimum_contig_dispatch_table); +}; + +} // namespace impl + // B19: ==== MULTIPLY (x1, x2) namespace impl { @@ -3965,6 +4047,86 @@ void init_elementwise_functions(py::module_ m) m.def("_logical_xor_result_type", logical_xor_result_type_pyapi, ""); } + // B??: ==== MAXIMUM (x1, x2) + { + impl::populate_maximum_dispatch_tables(); + using impl::maximum_contig_dispatch_table; + using impl::maximum_output_id_table; + using impl::maximum_strided_dispatch_table; + + auto maximum_pyapi = [&](dpctl::tensor::usm_ndarray src1, + dpctl::tensor::usm_ndarray src2, + dpctl::tensor::usm_ndarray dst, + sycl::queue exec_q, + const std::vector &depends = {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, maximum_output_id_table, + // function pointers to handle operation on contiguous + // arrays (pointers may be nullptr) + maximum_contig_dispatch_table, + // function pointers to handle operation on strided arrays + // (most general case) + maximum_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix + // and c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix + // and c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + auto maximum_result_type_pyapi = [&](py::dtype dtype1, + py::dtype dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + maximum_output_id_table); + }; + m.def("_maximum", maximum_pyapi, "", py::arg("src1"), py::arg("src2"), + py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_maximum_result_type", maximum_result_type_pyapi, ""); + } + + // B??: ==== MINIMUM (x1, x2) + { + impl::populate_minimum_dispatch_tables(); + using impl::minimum_contig_dispatch_table; + using impl::minimum_output_id_table; + using impl::minimum_strided_dispatch_table; + + auto minimum_pyapi = [&](dpctl::tensor::usm_ndarray src1, + dpctl::tensor::usm_ndarray src2, + dpctl::tensor::usm_ndarray dst, + sycl::queue exec_q, + const std::vector &depends = {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, minimum_output_id_table, + // function pointers to handle operation on contiguous + // arrays (pointers may be nullptr) + minimum_contig_dispatch_table, + // function pointers to handle operation on strided arrays + // (most general case) + minimum_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix + // and c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + auto minimum_result_type_pyapi = [&](py::dtype dtype1, + py::dtype dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + minimum_output_id_table); + }; + m.def("_minimum", minimum_pyapi, "", py::arg("src1"), py::arg("src2"), + py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_minimum_result_type", minimum_result_type_pyapi, ""); + } + // B19: ==== MULTIPLY (x1, x2) { impl::populate_multiply_dispatch_tables(); diff --git a/dpctl/tensor/libtensor/source/tensor_py.cpp b/dpctl/tensor/libtensor/source/tensor_py.cpp index 1833c2d770..9b4ba6cdad 100644 --- a/dpctl/tensor/libtensor/source/tensor_py.cpp +++ b/dpctl/tensor/libtensor/source/tensor_py.cpp @@ -297,9 +297,13 @@ PYBIND11_MODULE(_tensor_impl, m) m.def("default_device_complex_type", dpctl::tensor::py_internal::default_device_complex_type, - "Gives default complex floating point type support by device.", + "Gives default complex floating point type supported by device.", py::arg("dev")); + m.def("default_device_index_type", + dpctl::tensor::py_internal::default_device_index_type, + "Gives default index type supported by device.", py::arg("dev")); + auto tril_fn = [](dpctl::tensor::usm_ndarray src, dpctl::tensor::usm_ndarray dst, py::ssize_t k, sycl::queue exec_q, diff --git a/dpctl/tests/elementwise/test_complex.py b/dpctl/tests/elementwise/test_complex.py index 684b405612..85d3c0ad2e 100644 --- a/dpctl/tests/elementwise/test_complex.py +++ b/dpctl/tests/elementwise/test_complex.py @@ -15,6 +15,7 @@ # limitations under the License. import itertools +import warnings import numpy as np import pytest @@ -203,12 +204,18 @@ def test_complex_special_cases(dtype): Xc = dpt.asarray(Xc_np, dtype=dtype, sycl_queue=q) tol = 8 * dpt.finfo(dtype).resolution - assert_allclose( - dpt.asnumpy(dpt.real(Xc)), np.real(Xc_np), atol=tol, rtol=tol - ) - assert_allclose( - dpt.asnumpy(dpt.imag(Xc)), np.imag(Xc_np), atol=tol, rtol=tol - ) - assert_allclose( - dpt.asnumpy(dpt.conj(Xc)), np.conj(Xc_np), atol=tol, rtol=tol - ) + + actual = dpt.real(Xc) + expected = np.real(Xc_np) + assert_allclose(dpt.asnumpy(actual), expected, atol=tol, rtol=tol) + + actual = dpt.imag(Xc) + expected = np.imag(Xc_np) + assert_allclose(dpt.asnumpy(actual), expected, atol=tol, rtol=tol) + + actual = dpt.conj(Xc) + expected = np.conj(Xc_np) + + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + assert_allclose(dpt.asnumpy(actual), expected, atol=tol, rtol=tol) diff --git a/dpctl/tests/elementwise/test_expm1.py b/dpctl/tests/elementwise/test_expm1.py index 20dc421c77..ba95d2b96d 100644 --- a/dpctl/tests/elementwise/test_expm1.py +++ b/dpctl/tests/elementwise/test_expm1.py @@ -116,29 +116,53 @@ def test_expm1_order(dtype): def test_expm1_special_cases(): - q = get_queue_or_skip() + get_queue_or_skip() - X = dpt.asarray( - [dpt.nan, 0.0, -0.0, dpt.inf, -dpt.inf], dtype="f4", sycl_queue=q - ) - Xnp = dpt.asnumpy(X) + X = dpt.asarray([dpt.nan, 0.0, -0.0, dpt.inf, -dpt.inf], dtype="f4") + res = np.asarray([np.nan, 0.0, -0.0, np.inf, -1.0], dtype="f4") tol = dpt.finfo(X.dtype).resolution - assert_allclose( - dpt.asnumpy(dpt.expm1(X)), np.expm1(Xnp), atol=tol, rtol=tol - ) + assert_allclose(dpt.asnumpy(dpt.expm1(X)), res, atol=tol, rtol=tol) # special cases for complex variant + num_finite = 1.0 vals = [ - complex(*val) - for val in itertools.permutations( - [dpt.nan, dpt.inf, -dpt.inf, 0.0, -0.0, 1.0], 2 - ) + complex(0.0, 0.0), + complex(num_finite, dpt.inf), + complex(num_finite, dpt.nan), + complex(dpt.inf, 0.0), + complex(-dpt.inf, num_finite), + complex(dpt.inf, num_finite), + complex(-dpt.inf, dpt.inf), + complex(dpt.inf, dpt.inf), + complex(-dpt.inf, dpt.nan), + complex(dpt.inf, dpt.nan), + complex(dpt.nan, 0.0), + complex(dpt.nan, num_finite), + complex(dpt.nan, dpt.nan), ] X = dpt.asarray(vals, dtype=dpt.complex64) - Xnp = dpt.asnumpy(X) + cis_1 = complex(np.cos(num_finite), np.sin(num_finite)) + c_nan = complex(np.nan, np.nan) + res = np.asarray( + [ + complex(0.0, 0.0), + c_nan, + c_nan, + complex(np.inf, 0.0), + 0.0 * cis_1 - 1.0, + np.inf * cis_1 - 1.0, + complex(-1.0, 0.0), + complex(np.inf, np.nan), + complex(-1.0, 0.0), + complex(np.inf, np.nan), + complex(np.nan, 0.0), + c_nan, + c_nan, + ], + dtype=np.complex64, + ) tol = dpt.finfo(X.dtype).resolution - assert_allclose( - dpt.asnumpy(dpt.expm1(X)), np.expm1(Xnp), atol=tol, rtol=tol - ) + with np.errstate(invalid="ignore"): + assert_allclose(dpt.asnumpy(dpt.expm1(X)), res, atol=tol, rtol=tol) diff --git a/dpctl/tests/elementwise/test_greater.py b/dpctl/tests/elementwise/test_greater.py index fbda074e53..97915411ea 100644 --- a/dpctl/tests/elementwise/test_greater.py +++ b/dpctl/tests/elementwise/test_greater.py @@ -8,7 +8,7 @@ # # http://www.apache.org/licenses/LICENSE-2.0 # -# Ungreater required by applicable law or agreed to in writing, software +# Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and diff --git a/dpctl/tests/elementwise/test_greater_equal.py b/dpctl/tests/elementwise/test_greater_equal.py index 3f56e5d460..d188a852ea 100644 --- a/dpctl/tests/elementwise/test_greater_equal.py +++ b/dpctl/tests/elementwise/test_greater_equal.py @@ -8,7 +8,7 @@ # # http://www.apache.org/licenses/LICENSE-2.0 # -# Ungreater_equal required by applicable law or agreed to in writing, software +# Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and diff --git a/dpctl/tests/elementwise/test_less_equal.py b/dpctl/tests/elementwise/test_less_equal.py index b539d6a48f..1949b9d91f 100644 --- a/dpctl/tests/elementwise/test_less_equal.py +++ b/dpctl/tests/elementwise/test_less_equal.py @@ -8,7 +8,7 @@ # # http://www.apache.org/licenses/LICENSE-2.0 # -# Unless_equal required by applicable law or agreed to in writing, software +# Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and diff --git a/dpctl/tests/elementwise/test_log1p.py b/dpctl/tests/elementwise/test_log1p.py index 2820b4f8b5..d40574c415 100644 --- a/dpctl/tests/elementwise/test_log1p.py +++ b/dpctl/tests/elementwise/test_log1p.py @@ -119,28 +119,49 @@ def test_log1p_special_cases(): q = get_queue_or_skip() X = dpt.asarray( - [dpt.nan, -1.0, -2.0, 0.0, -0.0, dpt.inf, -dpt.inf], + [dpt.nan, -2.0, -1.0, -0.0, 0.0, dpt.inf], dtype="f4", sycl_queue=q, ) - Xnp = dpt.asnumpy(X) + res = np.asarray([np.nan, np.nan, -np.inf, -0.0, 0.0, np.inf]) tol = dpt.finfo(X.dtype).resolution - assert_allclose( - dpt.asnumpy(dpt.log1p(X)), np.log1p(Xnp), atol=tol, rtol=tol - ) + with np.errstate(divide="ignore", invalid="ignore"): + assert_allclose(dpt.asnumpy(dpt.log1p(X)), res, atol=tol, rtol=tol) # special cases for complex vals = [ - complex(*val) - for val in itertools.permutations( - [dpt.nan, dpt.inf, -dpt.inf, 0.0, -0.0, 1.0, -1.0, -2.0], 2 - ) + complex(-1.0, 0.0), + complex(2.0, dpt.inf), + complex(2.0, dpt.nan), + complex(-dpt.inf, 1.0), + complex(dpt.inf, 1.0), + complex(-dpt.inf, dpt.inf), + complex(dpt.inf, dpt.inf), + complex(dpt.inf, dpt.nan), + complex(dpt.nan, 1.0), + complex(dpt.nan, dpt.inf), + complex(dpt.nan, dpt.nan), ] X = dpt.asarray(vals, dtype=dpt.complex64) - Xnp = dpt.asnumpy(X) + c_nan = complex(np.nan, np.nan) + res = np.asarray( + [ + complex(-np.inf, 0.0), + complex(np.inf, np.pi / 2), + c_nan, + complex(np.inf, np.pi), + complex(np.inf, 0.0), + complex(np.inf, 3 * np.pi / 4), + complex(np.inf, np.pi / 4), + complex(np.inf, np.nan), + c_nan, + complex(np.inf, np.nan), + c_nan, + ], + dtype=np.complex64, + ) tol = dpt.finfo(X.dtype).resolution - assert_allclose( - dpt.asnumpy(dpt.log1p(X)), np.log1p(Xnp), atol=tol, rtol=tol - ) + with np.errstate(invalid="ignore"): + assert_allclose(dpt.asnumpy(dpt.log1p(X)), res, atol=tol, rtol=tol) diff --git a/dpctl/tests/elementwise/test_logical_and.py b/dpctl/tests/elementwise/test_logical_and.py index 12a35b06d6..c897bde876 100644 --- a/dpctl/tests/elementwise/test_logical_and.py +++ b/dpctl/tests/elementwise/test_logical_and.py @@ -8,7 +8,7 @@ # # http://www.apache.org/licenses/LICENSE-2.0 # -# Unless_equal required by applicable law or agreed to in writing, software +# Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and diff --git a/dpctl/tests/elementwise/test_logical_not.py b/dpctl/tests/elementwise/test_logical_not.py index aec9bf31b4..80ebaebb89 100644 --- a/dpctl/tests/elementwise/test_logical_not.py +++ b/dpctl/tests/elementwise/test_logical_not.py @@ -8,7 +8,7 @@ # # http://www.apache.org/licenses/LICENSE-2.0 # -# Unless_equal required by applicable law or agreed to in writing, software +# Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and diff --git a/dpctl/tests/elementwise/test_logical_or.py b/dpctl/tests/elementwise/test_logical_or.py index f99f6758f5..9bc46cb042 100644 --- a/dpctl/tests/elementwise/test_logical_or.py +++ b/dpctl/tests/elementwise/test_logical_or.py @@ -8,7 +8,7 @@ # # http://www.apache.org/licenses/LICENSE-2.0 # -# Unless_equal required by applicable law or agreed to in writing, software +# Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and diff --git a/dpctl/tests/elementwise/test_logical_xor.py b/dpctl/tests/elementwise/test_logical_xor.py index 9c34e8bbb5..aef9bc2f42 100644 --- a/dpctl/tests/elementwise/test_logical_xor.py +++ b/dpctl/tests/elementwise/test_logical_xor.py @@ -8,7 +8,7 @@ # # http://www.apache.org/licenses/LICENSE-2.0 # -# Unless_equal required by applicable law or agreed to in writing, software +# Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and diff --git a/dpctl/tests/elementwise/test_maximum_minimum.py b/dpctl/tests/elementwise/test_maximum_minimum.py new file mode 100644 index 0000000000..e8b845d20d --- /dev/null +++ b/dpctl/tests/elementwise/test_maximum_minimum.py @@ -0,0 +1,314 @@ +# Data Parallel Control (dpctl) +# +# Copyright 2020-2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import ctypes +import itertools + +import numpy as np +import pytest +from numpy.testing import assert_array_equal + +import dpctl +import dpctl.tensor as dpt +from dpctl.tests.helper import get_queue_or_skip, skip_if_dtype_not_supported + +from .utils import _all_dtypes, _compare_dtypes, _usm_types + + +@pytest.mark.parametrize("op1_dtype", _all_dtypes) +@pytest.mark.parametrize("op2_dtype", _all_dtypes) +def test_maximum_minimum_dtype_matrix(op1_dtype, op2_dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(op1_dtype, q) + skip_if_dtype_not_supported(op2_dtype, q) + + sz = 127 + ar1_np = np.arange(sz) + np.random.shuffle(ar1_np) + ar1 = dpt.asarray(ar1_np, dtype=op1_dtype) + ar2_np = np.arange(sz) + np.random.shuffle(ar2_np) + ar2 = dpt.asarray(ar2_np, dtype=op2_dtype) + + r = dpt.maximum(ar1, ar2) + assert isinstance(r, dpt.usm_ndarray) + expected = np.maximum(ar1_np.astype(op1_dtype), ar2_np.astype(op2_dtype)) + + assert _compare_dtypes(r.dtype, expected.dtype, sycl_queue=q) + assert r.shape == ar1.shape + assert (dpt.asnumpy(r) == expected).all() + assert r.sycl_queue == ar1.sycl_queue + + r = dpt.minimum(ar1, ar2) + assert isinstance(r, dpt.usm_ndarray) + expected = np.minimum(ar1_np.astype(op1_dtype), ar2_np.astype(op2_dtype)) + + assert _compare_dtypes(r.dtype, expected.dtype, sycl_queue=q) + assert r.shape == ar1.shape + assert (dpt.asnumpy(r) == expected).all() + assert r.sycl_queue == ar1.sycl_queue + + ar3_np = np.arange(sz) + np.random.shuffle(ar3_np) + ar3 = dpt.asarray(ar3_np, dtype=op1_dtype) + ar4_np = np.arange(2 * sz) + np.random.shuffle(ar4_np) + ar4 = dpt.asarray(ar4_np, dtype=op2_dtype) + + r = dpt.maximum(ar3[::-1], ar4[::2]) + assert isinstance(r, dpt.usm_ndarray) + expected = np.maximum( + ar3_np[::-1].astype(op1_dtype), ar4_np[::2].astype(op2_dtype) + ) + + assert _compare_dtypes(r.dtype, expected.dtype, sycl_queue=q) + assert r.shape == ar3.shape + assert (dpt.asnumpy(r) == expected).all() + + r = dpt.minimum(ar3[::-1], ar4[::2]) + assert isinstance(r, dpt.usm_ndarray) + expected = np.minimum( + ar3_np[::-1].astype(op1_dtype), ar4_np[::2].astype(op2_dtype) + ) + + assert _compare_dtypes(r.dtype, expected.dtype, sycl_queue=q) + assert r.shape == ar3.shape + assert (dpt.asnumpy(r) == expected).all() + + +@pytest.mark.parametrize("op_dtype", ["c8", "c16"]) +def test_maximum_minimum_complex_matrix(op_dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(op_dtype, q) + + sz = 127 + ar1_np_real = np.random.randint(0, 10, sz) + ar1_np_imag = np.random.randint(0, 10, sz) + ar1 = dpt.asarray(ar1_np_real + 1j * ar1_np_imag, dtype=op_dtype) + + ar2_np_real = np.random.randint(0, 10, sz) + ar2_np_imag = np.random.randint(0, 10, sz) + ar2 = dpt.asarray(ar2_np_real + 1j * ar2_np_imag, dtype=op_dtype) + + r = dpt.maximum(ar1, ar2) + expected = np.maximum(dpt.asnumpy(ar1), dpt.asnumpy(ar2)) + assert _compare_dtypes(r.dtype, expected.dtype, sycl_queue=q) + assert r.shape == expected.shape + assert_array_equal(dpt.asnumpy(r), expected) + + r1 = dpt.maximum(ar1[::-2], ar2[::2]) + expected1 = np.maximum(dpt.asnumpy(ar1[::-2]), dpt.asnumpy(ar2[::2])) + assert _compare_dtypes(r.dtype, expected1.dtype, sycl_queue=q) + assert r1.shape == expected1.shape + assert_array_equal(dpt.asnumpy(r1), expected1) + + r = dpt.minimum(ar1, ar2) + expected = np.minimum(dpt.asnumpy(ar1), dpt.asnumpy(ar2)) + assert _compare_dtypes(r.dtype, expected.dtype, sycl_queue=q) + assert r.shape == expected.shape + assert_array_equal(dpt.asnumpy(r), expected) + + r1 = dpt.minimum(ar1[::-2], ar2[::2]) + expected1 = np.minimum(dpt.asnumpy(ar1[::-2]), dpt.asnumpy(ar2[::2])) + assert _compare_dtypes(r.dtype, expected1.dtype, sycl_queue=q) + assert r1.shape == expected1.shape + assert_array_equal(dpt.asnumpy(r1), expected1) + + +@pytest.mark.parametrize("dtype", ["f2", "f4", "f8"]) +def test_maximum_minimum_real_special_cases(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + x = [np.nan, np.inf, -np.inf, 5.0, -3.0] + x = list(itertools.product(x, repeat=2)) + Xnp = np.array([tup[0] for tup in x], dtype=dtype) + Ynp = np.array([tup[1] for tup in x], dtype=dtype) + X = dpt.asarray(Xnp, dtype=dtype) + Y = dpt.asarray(Ynp, dtype=dtype) + + R = dpt.maximum(X, Y) + Rnp = np.maximum(Xnp, Ynp) + assert_array_equal(dpt.asnumpy(R), Rnp) + + R = dpt.minimum(X, Y) + Rnp = np.minimum(Xnp, Ynp) + assert_array_equal(dpt.asnumpy(R), Rnp) + + +@pytest.mark.parametrize("dtype", ["c8", "c16"]) +def test_maximum_minimum_complex_special_cases(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + x = [np.nan, -np.inf, -np.inf, +2.0, -1.0] + x = [complex(*val) for val in itertools.product(x, repeat=2)] + x = list(itertools.product(x, repeat=2)) + + Xnp = np.array([tup[0] for tup in x], dtype=dtype) + Ynp = np.array([tup[1] for tup in x], dtype=dtype) + X = dpt.asarray(Xnp, dtype=dtype, sycl_queue=q) + Y = dpt.asarray(Ynp, dtype=dtype, sycl_queue=q) + + R = dpt.maximum(X, Y) + Rnp = np.maximum(Xnp, Ynp) + assert_array_equal(dpt.asnumpy(dpt.real(R)), np.real(Rnp)) + assert_array_equal(dpt.asnumpy(dpt.imag(R)), np.imag(Rnp)) + + R = dpt.minimum(X, Y) + Rnp = np.minimum(Xnp, Ynp) + assert_array_equal(dpt.asnumpy(dpt.real(R)), np.real(Rnp)) + assert_array_equal(dpt.asnumpy(dpt.imag(R)), np.imag(Rnp)) + + +@pytest.mark.parametrize("op1_usm_type", _usm_types) +@pytest.mark.parametrize("op2_usm_type", _usm_types) +def test_maximum_minimum_usm_type_matrix(op1_usm_type, op2_usm_type): + get_queue_or_skip() + + sz = 128 + ar1_np = np.arange(sz, dtype="i4") + np.random.shuffle(ar1_np) + ar1 = dpt.asarray(ar1_np, usm_type=op1_usm_type) + ar2_np = np.arange(sz, dtype="i4") + np.random.shuffle(ar2_np) + ar2 = dpt.asarray(ar2_np, usm_type=op2_usm_type) + + r = dpt.maximum(ar1, ar2) + assert isinstance(r, dpt.usm_ndarray) + expected_usm_type = dpctl.utils.get_coerced_usm_type( + (op1_usm_type, op2_usm_type) + ) + assert r.usm_type == expected_usm_type + + r = dpt.minimum(ar1, ar2) + assert isinstance(r, dpt.usm_ndarray) + expected_usm_type = dpctl.utils.get_coerced_usm_type( + (op1_usm_type, op2_usm_type) + ) + assert r.usm_type == expected_usm_type + + +def test_maximum_minimum_order(): + get_queue_or_skip() + + ar1_np = np.arange(20 * 20, dtype="i4").reshape(20, 20) + np.random.shuffle(ar1_np) + ar1 = dpt.asarray(ar1_np, order="C") + ar2_np = np.arange(20 * 20, dtype="i4").reshape(20, 20) + np.random.shuffle(ar2_np) + ar2 = dpt.asarray(ar2_np, order="C") + + r1 = dpt.maximum(ar1, ar2, order="C") + assert r1.flags.c_contiguous + r2 = dpt.maximum(ar1, ar2, order="F") + assert r2.flags.f_contiguous + r3 = dpt.maximum(ar1, ar2, order="A") + assert r3.flags.c_contiguous + r4 = dpt.maximum(ar1, ar2, order="K") + assert r4.flags.c_contiguous + + ar1 = dpt.asarray(ar1_np, order="F") + ar2 = dpt.asarray(ar2_np, order="F") + r1 = dpt.maximum(ar1, ar2, order="C") + assert r1.flags.c_contiguous + r2 = dpt.maximum(ar1, ar2, order="F") + assert r2.flags.f_contiguous + r3 = dpt.maximum(ar1, ar2, order="A") + assert r3.flags.f_contiguous + r4 = dpt.maximum(ar1, ar2, order="K") + assert r4.flags.f_contiguous + + ar1_np = np.arange(40 * 40, dtype="i4").reshape(40, 40) + np.random.shuffle(ar1_np) + ar1 = dpt.asarray(ar1_np, order="C")[:20, ::-2] + ar2_np = np.arange(40 * 40, dtype="i4").reshape(40, 40) + np.random.shuffle(ar2_np) + ar2 = dpt.asarray(ar2_np, order="C")[:20, ::-2] + r4 = dpt.maximum(ar1, ar2, order="K") + assert r4.strides == (20, -1) + + ar1 = dpt.asarray(ar1_np, order="C")[:20, ::-2].mT + ar2 = dpt.asarray(ar2_np, order="C")[:20, ::-2].mT + r4 = dpt.maximum(ar1, ar2, order="K") + assert r4.strides == (-1, 20) + + +@pytest.mark.parametrize("arr_dt", _all_dtypes) +def test_maximum_minimum_python_scalar(arr_dt): + q = get_queue_or_skip() + skip_if_dtype_not_supported(arr_dt, q) + + X = dpt.zeros((10, 10), dtype=arr_dt, sycl_queue=q) + py_ones = ( + bool(1), + int(1), + float(1), + complex(1), + np.float32(1), + ctypes.c_int(1), + ) + for sc in py_ones: + R = dpt.maximum(X, sc) + assert isinstance(R, dpt.usm_ndarray) + R = dpt.maximum(sc, X) + assert isinstance(R, dpt.usm_ndarray) + + R = dpt.minimum(X, sc) + assert isinstance(R, dpt.usm_ndarray) + R = dpt.minimum(sc, X) + assert isinstance(R, dpt.usm_ndarray) + + +class MockArray: + def __init__(self, arr): + self.data_ = arr + + @property + def __sycl_usm_array_interface__(self): + return self.data_.__sycl_usm_array_interface__ + + +def test_maximum_minimum_mock_array(): + get_queue_or_skip() + a = dpt.arange(10) + b = dpt.ones(10) + c = MockArray(b) + r = dpt.maximum(a, c) + assert isinstance(r, dpt.usm_ndarray) + + r = dpt.minimum(a, c) + assert isinstance(r, dpt.usm_ndarray) + + +def test_maximum_canary_mock_array(): + get_queue_or_skip() + a = dpt.arange(10) + + class Canary: + def __init__(self): + pass + + @property + def __sycl_usm_array_interface__(self): + return None + + c = Canary() + with pytest.raises(ValueError): + dpt.maximum(a, c) + + with pytest.raises(ValueError): + dpt.minimum(a, c) diff --git a/dpctl/tests/elementwise/test_round.py b/dpctl/tests/elementwise/test_round.py index fb2b104bb1..6ca4feaf22 100644 --- a/dpctl/tests/elementwise/test_round.py +++ b/dpctl/tests/elementwise/test_round.py @@ -1,3 +1,19 @@ +# Data Parallel Control (dpctl) +# +# Copyright 2020-2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import itertools import numpy as np diff --git a/dpctl/tests/elementwise/test_type_utils.py b/dpctl/tests/elementwise/test_type_utils.py index 403e455c2e..f6e09b9b4e 100644 --- a/dpctl/tests/elementwise/test_type_utils.py +++ b/dpctl/tests/elementwise/test_type_utils.py @@ -18,6 +18,7 @@ import dpctl import dpctl.tensor as dpt +import dpctl.tensor._copy_utils as cu import dpctl.tensor._type_utils as tu from .utils import _all_dtypes, _map_to_device_dtype @@ -73,15 +74,15 @@ def test_type_utils_empty_like_orderK(): a = dpt.empty((10, 10), dtype=dpt.int32, order="F") except dpctl.SyclDeviceCreationError: pytest.skip("No SYCL devices available") - X = tu._empty_like_orderK(a, dpt.int32, a.usm_type, a.device) + X = cu._empty_like_orderK(a, dpt.int32, a.usm_type, a.device) assert X.flags["F"] def test_type_utils_empty_like_orderK_invalid_args(): with pytest.raises(TypeError): - tu._empty_like_orderK([1, 2, 3], dpt.int32, "device", None) + cu._empty_like_orderK([1, 2, 3], dpt.int32, "device", None) with pytest.raises(TypeError): - tu._empty_like_pair_orderK( + cu._empty_like_pair_orderK( [1, 2, 3], ( 1, @@ -98,7 +99,7 @@ def test_type_utils_empty_like_orderK_invalid_args(): except dpctl.SyclDeviceCreationError: pytest.skip("No SYCL devices available") with pytest.raises(TypeError): - tu._empty_like_pair_orderK( + cu._empty_like_pair_orderK( a, ( 1, @@ -185,3 +186,47 @@ def test_binary_func_arg_validation(): with pytest.raises(ValueError): dpt.add(a, Ellipsis) dpt.add(a, a, order="invalid") + + +def test_all_data_types(): + fp16_fp64_types = set([dpt.float16, dpt.float64, dpt.complex128]) + fp64_types = set([dpt.float64, dpt.complex128]) + + all_dts = tu._all_data_types(True, True) + assert fp16_fp64_types.issubset(all_dts) + + all_dts = tu._all_data_types(True, False) + assert dpt.float16 in all_dts + assert not fp64_types.issubset(all_dts) + + all_dts = tu._all_data_types(False, True) + assert dpt.float16 not in all_dts + assert fp64_types.issubset(all_dts) + + all_dts = tu._all_data_types(False, False) + assert not fp16_fp64_types.issubset(all_dts) + + +@pytest.mark.parametrize("fp16", [True, False]) +@pytest.mark.parametrize("fp64", [True, False]) +def test_maximal_inexact_types(fp16, fp64): + assert not tu._is_maximal_inexact_type(dpt.int32, fp16, fp64) + assert fp64 == tu._is_maximal_inexact_type(dpt.float64, fp16, fp64) + assert fp64 == tu._is_maximal_inexact_type(dpt.complex128, fp16, fp64) + assert fp64 != tu._is_maximal_inexact_type(dpt.float32, fp16, fp64) + assert fp64 != tu._is_maximal_inexact_type(dpt.complex64, fp16, fp64) + + +def test_can_cast_device(): + assert tu._can_cast(dpt.int64, dpt.float64, True, True) + # if f8 is available, can't cast i8 to f4 + assert not tu._can_cast(dpt.int64, dpt.float32, True, True) + assert not tu._can_cast(dpt.int64, dpt.float32, False, True) + # should be able to cast to f8 when f2 unavailable + assert tu._can_cast(dpt.int64, dpt.float64, False, True) + # casting to f4 acceptable when f8 unavailable + assert tu._can_cast(dpt.int64, dpt.float32, True, False) + assert tu._can_cast(dpt.int64, dpt.float32, False, False) + # can't safely cast inexact type to inexact type of lesser precision + assert not tu._can_cast(dpt.float32, dpt.float16, True, False) + assert not tu._can_cast(dpt.float64, dpt.float32, False, True) diff --git a/dpctl/tests/test_sycl_platform.py b/dpctl/tests/test_sycl_platform.py index 39331f9cdc..fa93dbbd12 100644 --- a/dpctl/tests/test_sycl_platform.py +++ b/dpctl/tests/test_sycl_platform.py @@ -17,6 +17,8 @@ """Defines unit test cases for the SyclPlatform class. """ +import sys + import pytest from helper import has_sycl_platforms @@ -88,10 +90,27 @@ def check_repr(platform): def check_default_context(platform): + if "linux" not in sys.platform: + return r = platform.default_context assert type(r) is dpctl.SyclContext +def check_equal_and_hash(platform): + assert platform == platform + if "linux" not in sys.platform: + return + default_ctx = platform.default_context + for d in default_ctx.get_devices(): + assert platform == d.sycl_platform + assert hash(platform) == hash(d.sycl_platform) + + +def check_hash_in_dict(platform): + map = {platform: 0} + assert map[platform] == 0 + + list_of_checks = [ check_name, check_vendor, @@ -99,6 +118,9 @@ def check_default_context(platform): check_backend, check_print_info, check_repr, + check_default_context, + check_equal_and_hash, + check_hash_in_dict, ] diff --git a/dpctl/tests/test_type_utils.py b/dpctl/tests/test_type_utils.py deleted file mode 100644 index 882478a2ce..0000000000 --- a/dpctl/tests/test_type_utils.py +++ /dev/null @@ -1,68 +0,0 @@ -# Data Parallel Control (dpctl) -# -# Copyright 2020-2023 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pytest - -import dpctl.tensor as dpt -from dpctl.tensor._type_utils import ( - _all_data_types, - _can_cast, - _is_maximal_inexact_type, -) - - -def test_all_data_types(): - fp16_fp64_types = set([dpt.float16, dpt.float64, dpt.complex128]) - fp64_types = set([dpt.float64, dpt.complex128]) - - all_dts = _all_data_types(True, True) - assert fp16_fp64_types.issubset(all_dts) - - all_dts = _all_data_types(True, False) - assert dpt.float16 in all_dts - assert not fp64_types.issubset(all_dts) - - all_dts = _all_data_types(False, True) - assert dpt.float16 not in all_dts - assert fp64_types.issubset(all_dts) - - all_dts = _all_data_types(False, False) - assert not fp16_fp64_types.issubset(all_dts) - - -@pytest.mark.parametrize("fp16", [True, False]) -@pytest.mark.parametrize("fp64", [True, False]) -def test_maximal_inexact_types(fp16, fp64): - assert not _is_maximal_inexact_type(dpt.int32, fp16, fp64) - assert fp64 == _is_maximal_inexact_type(dpt.float64, fp16, fp64) - assert fp64 == _is_maximal_inexact_type(dpt.complex128, fp16, fp64) - assert fp64 != _is_maximal_inexact_type(dpt.float32, fp16, fp64) - assert fp64 != _is_maximal_inexact_type(dpt.complex64, fp16, fp64) - - -def test_can_cast_device(): - assert _can_cast(dpt.int64, dpt.float64, True, True) - # if f8 is available, can't cast i8 to f4 - assert not _can_cast(dpt.int64, dpt.float32, True, True) - assert not _can_cast(dpt.int64, dpt.float32, False, True) - # should be able to cast to f8 when f2 unavailable - assert _can_cast(dpt.int64, dpt.float64, False, True) - # casting to f4 acceptable when f8 unavailable - assert _can_cast(dpt.int64, dpt.float32, True, False) - assert _can_cast(dpt.int64, dpt.float32, False, False) - # can't safely cast inexact type to inexact type of lesser precision - assert not _can_cast(dpt.float32, dpt.float16, True, False) - assert not _can_cast(dpt.float64, dpt.float32, False, True) diff --git a/dpctl/tests/test_usm_ndarray_ctor.py b/dpctl/tests/test_usm_ndarray_ctor.py index e285dcb35f..4ab28db479 100644 --- a/dpctl/tests/test_usm_ndarray_ctor.py +++ b/dpctl/tests/test_usm_ndarray_ctor.py @@ -84,7 +84,7 @@ def test_usm_ndarray_flags(): assert f.forc assert f.fnc - f = dpt.usm_ndarray((5, 1, 1), dtype="i4", strides=(1, 0, 1)).flags + f = dpt.usm_ndarray((5, 0, 1), dtype="i4", strides=(1, 0, 1)).flags assert f.fc assert f.forc assert not dpt.usm_ndarray( @@ -109,6 +109,25 @@ def test_usm_ndarray_flags(): x.flags["C"] = False +def test_usm_ndarray_flags_bug_gh_1334(): + get_queue_or_skip() + a = dpt.ones((2, 3), dtype="u4") + r = dpt.reshape(a, (1, 6, 1)) + assert r.flags["C"] and r.flags["F"] + + a = dpt.ones((2, 3), dtype="u4", order="F") + r = dpt.reshape(a, (1, 6, 1), order="F") + assert r.flags["C"] and r.flags["F"] + + a = dpt.ones((2, 3, 4), dtype="i8") + r = dpt.sum(a, axis=(1, 2), keepdims=True) + assert r.flags["C"] and r.flags["F"] + + a = dpt.ones((2, 1), dtype="?") + r = a[:, 1::-1] + assert r.flags["F"] and r.flags["C"] + + @pytest.mark.parametrize( "dtype", [ @@ -1012,6 +1031,53 @@ def test_setitem_same_dtype(dtype, src_usm_type, dst_usm_type): Zusm_empty[Ellipsis] = Zusm_3d[0, 0, 0:0] +def test_setitem_broadcasting(): + get_queue_or_skip() + dst = dpt.ones((2, 3, 4), dtype="u4") + src = dpt.zeros((3, 1), dtype=dst.dtype) + dst[...] = src + expected = np.zeros(dst.shape, dtype=dst.dtype) + assert np.array_equal(dpt.asnumpy(dst), expected) + + +def test_setitem_broadcasting_empty_dst_validation(): + "Broadcasting rules apply, except exception" + get_queue_or_skip() + dst = dpt.ones((2, 0, 5, 4), dtype="i8") + src = dpt.ones((2, 0, 3, 4), dtype="i8") + with pytest.raises(ValueError): + dst[...] = src + + +def test_setitem_broadcasting_empty_dst_edge_case(): + """RHS is shunken to empty array by + broadasting rule, hence no exception""" + get_queue_or_skip() + dst = dpt.ones(1, dtype="i8")[0:0] + src = dpt.ones(tuple(), dtype="i8") + dst[...] = src + + +def test_setitem_broadcasting_src_ndim_equal_dst_ndim(): + get_queue_or_skip() + dst = dpt.ones((2, 3, 4), dtype="i4") + src = dpt.zeros((2, 1, 4), dtype="i4") + dst[...] = src + + expected = np.zeros(dst.shape, dtype=dst.dtype) + assert np.array_equal(dpt.asnumpy(dst), expected) + + +def test_setitem_broadcasting_src_ndim_greater_than_dst_ndim(): + get_queue_or_skip() + dst = dpt.ones((2, 3, 4), dtype="i4") + src = dpt.zeros((1, 2, 1, 4), dtype="i4") + dst[...] = src + + expected = np.zeros(dst.shape, dtype=dst.dtype) + assert np.array_equal(dpt.asnumpy(dst), expected) + + @pytest.mark.parametrize( "dtype", _all_dtypes, diff --git a/dpctl/tests/test_usm_ndarray_indexing.py b/dpctl/tests/test_usm_ndarray_indexing.py index 87d89a1b8d..9d166226e7 100644 --- a/dpctl/tests/test_usm_ndarray_indexing.py +++ b/dpctl/tests/test_usm_ndarray_indexing.py @@ -22,6 +22,7 @@ import dpctl import dpctl.tensor as dpt +import dpctl.tensor._tensor_impl as ti from dpctl.utils import ExecutionPlacementError _all_dtypes = [ @@ -1353,7 +1354,7 @@ def test_nonzero_dtype(): x = dpt.ones((3, 4)) idx, idy = dpt.nonzero(x) # create array using device's - # default integral data type - ref = dpt.arange(8) - assert idx.dtype == ref.dtype - assert idy.dtype == ref.dtype + # default index data type + index_dt = dpt.dtype(ti.default_device_index_type(x.sycl_queue)) + assert idx.dtype == index_dt + assert idy.dtype == index_dt diff --git a/dpctl/tests/test_usm_ndarray_operators.py b/dpctl/tests/test_usm_ndarray_operators.py index 92dc38bd47..def22562c0 100644 --- a/dpctl/tests/test_usm_ndarray_operators.py +++ b/dpctl/tests/test_usm_ndarray_operators.py @@ -47,7 +47,7 @@ def multiply(a, b): return b -@pytest.mark.parametrize("namespace", [None, Dummy()]) +@pytest.mark.parametrize("namespace", [dpt, Dummy()]) def test_fp_ops(namespace): try: X = dpt.ones(1) @@ -81,7 +81,7 @@ def test_fp_ops(namespace): X.__ifloordiv__(1.0) -@pytest.mark.parametrize("namespace", [None, Dummy()]) +@pytest.mark.parametrize("namespace", [dpt, Dummy()]) def test_int_ops(namespace): try: X = dpt.usm_ndarray(1, "i4") @@ -113,7 +113,7 @@ def test_int_ops(namespace): X.__ipow__(2) -@pytest.mark.parametrize("namespace", [None, Dummy()]) +@pytest.mark.parametrize("namespace", [dpt, Dummy()]) def test_mat_ops(namespace): try: M = dpt.eye(3, 3) diff --git a/libsyclinterface/include/dpctl_sycl_context_interface.h b/libsyclinterface/include/dpctl_sycl_context_interface.h index 6ca3c92d05..ddf80e1e67 100644 --- a/libsyclinterface/include/dpctl_sycl_context_interface.h +++ b/libsyclinterface/include/dpctl_sycl_context_interface.h @@ -159,6 +159,6 @@ void DPCTLContext_Delete(__dpctl_take DPCTLSyclContextRef CtxRef); * @ingroup ContextInterface */ DPCTL_API -size_t DPCTLContext_Hash(__dpctl_take DPCTLSyclContextRef CtxRef); +size_t DPCTLContext_Hash(__dpctl_keep DPCTLSyclContextRef CtxRef); DPCTL_C_EXTERN_C_END diff --git a/libsyclinterface/include/dpctl_sycl_platform_interface.h b/libsyclinterface/include/dpctl_sycl_platform_interface.h index 3e40453e4a..90dab58f1c 100644 --- a/libsyclinterface/include/dpctl_sycl_platform_interface.h +++ b/libsyclinterface/include/dpctl_sycl_platform_interface.h @@ -39,6 +39,19 @@ DPCTL_C_EXTERN_C_BEGIN * @defgroup PlatformInterface Platform class C wrapper */ +/*! + * @brief Checks if two DPCTLSyclPlatformRef objects point to the same + * sycl::platform. + * + * @param PRef1 First opaque pointer to a ``sycl::platform``. + * @param PRef2 Second opaque pointer to a ``sycl::platform``. + * @return True if the underlying sycl::platform are same, false otherwise. + * @ingroup PlatformInterface + */ +DPCTL_API +bool DPCTLPlatform_AreEq(__dpctl_keep const DPCTLSyclPlatformRef PRef1, + __dpctl_keep const DPCTLSyclPlatformRef PRef2); + /*! * @brief Returns a copy of the DPCTLSyclPlatformRef object. * @@ -155,4 +168,14 @@ DPCTL_API __dpctl_give DPCTLSyclContextRef DPCTLPlatform_GetDefaultContext(__dpctl_keep const DPCTLSyclPlatformRef PRef); +/*! + * @brief Wrapper over std::hash's operator() + * + * @param PRef The DPCTLSyclPlatformRef pointer. + * @return Hash value of the underlying ``sycl::platform`` instance. + * @ingroup PlatformInterface + */ +DPCTL_API +size_t DPCTLPlatform_Hash(__dpctl_keep DPCTLSyclPlatformRef CtxRef); + DPCTL_C_EXTERN_C_END diff --git a/libsyclinterface/source/dpctl_sycl_platform_interface.cpp b/libsyclinterface/source/dpctl_sycl_platform_interface.cpp index 2083259611..5be98b7b61 100644 --- a/libsyclinterface/source/dpctl_sycl_platform_interface.cpp +++ b/libsyclinterface/source/dpctl_sycl_platform_interface.cpp @@ -234,3 +234,27 @@ DPCTLPlatform_GetDefaultContext(__dpctl_keep const DPCTLSyclPlatformRef PRef) return nullptr; } } + +bool DPCTLPlatform_AreEq(__dpctl_keep const DPCTLSyclPlatformRef PRef1, + __dpctl_keep const DPCTLSyclPlatformRef PRef2) +{ + auto P1 = unwrap(PRef1); + auto P2 = unwrap(PRef2); + if (P1 && P2) + return *P1 == *P2; + else + return false; +} + +size_t DPCTLPlatform_Hash(__dpctl_keep const DPCTLSyclPlatformRef PRef) +{ + if (PRef) { + auto P = unwrap(PRef); + std::hash hash_fn; + return hash_fn(*P); + } + else { + error_handler("Argument PRef is null.", __FILE__, __func__, __LINE__); + return 0; + } +} diff --git a/libsyclinterface/tests/test_sycl_platform_interface.cpp b/libsyclinterface/tests/test_sycl_platform_interface.cpp index e6fb7df134..f04cead0e1 100644 --- a/libsyclinterface/tests/test_sycl_platform_interface.cpp +++ b/libsyclinterface/tests/test_sycl_platform_interface.cpp @@ -264,6 +264,25 @@ TEST_P(TestDPCTLSyclPlatformInterface, ChkPrintInfoNullArg) EXPECT_NO_FATAL_FAILURE(DPCTLPlatformMgr_PrintInfo(Null_PRef, 0)); } +TEST_P(TestDPCTLSyclPlatformInterface, ChkAreEq) +{ + DPCTLSyclPlatformRef PRef_Copy = nullptr; + + EXPECT_NO_FATAL_FAILURE(PRef_Copy = DPCTLPlatform_Copy(PRef)); + + ASSERT_TRUE(DPCTLPlatform_AreEq(PRef, PRef_Copy)); + EXPECT_TRUE(DPCTLPlatform_Hash(PRef) == DPCTLPlatform_Hash(PRef_Copy)); + + EXPECT_NO_FATAL_FAILURE(DPCTLPlatform_Delete(PRef_Copy)); +} + +TEST_P(TestDPCTLSyclPlatformInterface, ChkAreEqNullArg) +{ + DPCTLSyclPlatformRef Null_PRef = nullptr; + ASSERT_FALSE(DPCTLPlatform_AreEq(PRef, Null_PRef)); + ASSERT_TRUE(DPCTLPlatform_Hash(Null_PRef) == 0); +} + TEST_F(TestDPCTLSyclDefaultPlatform, ChkGetName) { check_platform_name(PRef);