From 80eae6ee12152ceedf9538a80c3312d205cd200f Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Mon, 7 Aug 2023 07:39:07 -0700 Subject: [PATCH 1/5] Corrected order="K" support in copy --- dpctl/tensor/_copy_utils.py | 131 ++++++++++++++++-------------------- 1 file changed, 59 insertions(+), 72 deletions(-) diff --git a/dpctl/tensor/_copy_utils.py b/dpctl/tensor/_copy_utils.py index 6ad4f6a154..565a11dec7 100644 --- a/dpctl/tensor/_copy_utils.py +++ b/dpctl/tensor/_copy_utils.py @@ -290,78 +290,6 @@ def _copy_from_usm_ndarray_to_usm_ndarray(dst, src): _copy_same_shape(dst, src_same_shape) -def copy(usm_ary, order="K"): - """copy(ary, order="K") - - Creates a copy of given instance of :class:`dpctl.tensor.usm_ndarray`. - - Args: - ary (usm_ndarray): - Input array. - order ({"C", "F", "A", "K"}, optional): - Controls the memory layout of the output array. - Returns: - usm_ndarray: - A copy of the input array. - - Memory layout of the copy is controlled by `order` keyword, - following NumPy's conventions. The `order` keywords can be - one of the following: - - - "C": C-contiguous memory layout - - "F": Fortran-contiguous memory layout - - "A": Fortran-contiguous if the input array is also Fortran-contiguous, - otherwise C-contiguous - - "K": match the layout of `usm_ary` as closely as possible. - - """ - if not isinstance(usm_ary, dpt.usm_ndarray): - return TypeError( - f"Expected object of type dpt.usm_ndarray, got {type(usm_ary)}" - ) - copy_order = "C" - if order == "C": - pass - elif order == "F": - copy_order = order - elif order == "A": - if usm_ary.flags.f_contiguous: - copy_order = "F" - elif order == "K": - if usm_ary.flags.f_contiguous: - copy_order = "F" - else: - raise ValueError( - "Unrecognized value of the order keyword. " - "Recognized values are 'A', 'C', 'F', or 'K'" - ) - c_contig = usm_ary.flags.c_contiguous - f_contig = usm_ary.flags.f_contiguous - R = dpt.usm_ndarray( - usm_ary.shape, - dtype=usm_ary.dtype, - buffer=usm_ary.usm_type, - order=copy_order, - buffer_ctor_kwargs={"queue": usm_ary.sycl_queue}, - ) - if order == "K" and (not c_contig and not f_contig): - original_strides = usm_ary.strides - ind = sorted( - range(usm_ary.ndim), - key=lambda i: abs(original_strides[i]), - reverse=True, - ) - new_strides = tuple(R.strides[ind[i]] for i in ind) - R = dpt.usm_ndarray( - usm_ary.shape, - dtype=usm_ary.dtype, - buffer=R.usm_data, - strides=new_strides, - ) - _copy_same_shape(R, usm_ary) - return R - - def _empty_like_orderK(X, dt, usm_type=None, dev=None): """Returns empty array like `x`, using order='K' @@ -452,6 +380,65 @@ def _empty_like_pair_orderK(X1, X2, dt, res_shape, usm_type, dev): return dpt.permute_dims(R, inv_perm) +def copy(usm_ary, order="K"): + """copy(ary, order="K") + + Creates a copy of given instance of :class:`dpctl.tensor.usm_ndarray`. + + Args: + ary (usm_ndarray): + Input array. + order ({"C", "F", "A", "K"}, optional): + Controls the memory layout of the output array. + Returns: + usm_ndarray: + A copy of the input array. + + Memory layout of the copy is controlled by `order` keyword, + following NumPy's conventions. The `order` keywords can be + one of the following: + + - "C": C-contiguous memory layout + - "F": Fortran-contiguous memory layout + - "A": Fortran-contiguous if the input array is also Fortran-contiguous, + otherwise C-contiguous + - "K": match the layout of `usm_ary` as closely as possible. + + """ + if not isinstance(usm_ary, dpt.usm_ndarray): + return TypeError( + f"Expected object of type dpt.usm_ndarray, got {type(usm_ary)}" + ) + copy_order = "C" + if order == "C": + pass + elif order == "F": + copy_order = order + elif order == "A": + if usm_ary.flags.f_contiguous: + copy_order = "F" + elif order == "K": + if usm_ary.flags.f_contiguous: + copy_order = "F" + else: + raise ValueError( + "Unrecognized value of the order keyword. " + "Recognized values are 'A', 'C', 'F', or 'K'" + ) + if order == "K": + R = _empty_like_orderK(usm_ary, usm_ary.dtype) + else: + R = dpt.usm_ndarray( + usm_ary.shape, + dtype=usm_ary.dtype, + buffer=usm_ary.usm_type, + order=copy_order, + buffer_ctor_kwargs={"queue": usm_ary.sycl_queue}, + ) + _copy_same_shape(R, usm_ary) + return R + + def astype(usm_ary, newdtype, order="K", casting="unsafe", copy=True): """ astype(array, new_dtype, order="K", casting="unsafe", \ copy=True) From ff1081aeb0c9555ddbd7761ef1175b562dc60272 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Mon, 7 Aug 2023 07:39:13 -0700 Subject: [PATCH 2/5] Fixed logaddexp for mixed nan and number operands --- .../elementwise_functions/logaddexp.hpp | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp index 39ba5c3fcf..f2d2505f2c 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp @@ -28,6 +28,7 @@ #include #include #include +#include #include #include "utils/offset_utils.hpp" @@ -55,13 +56,14 @@ using dpctl::tensor::type_utils::vec_cast; template struct LogAddExpFunctor { - using supports_sg_loadstore = typename std::negation< - std::disjunction, tu_ns::is_complex>>; - using supports_vec = typename std::negation< - std::disjunction, tu_ns::is_complex>>; + using supports_sg_loadstore = std::true_type; + using supports_vec = std::true_type; resT operator()(const argT1 &in1, const argT2 &in2) { + if (std::isnan(in1) || std::isnan(in2)) { + return std::numeric_limits::quiet_NaN(); + } resT max = std::max(in1, in2); resT min = std::min(in1, in2); return max + std::log1p(std::exp(min - max)); @@ -76,8 +78,13 @@ template struct LogAddExpFunctor #pragma unroll for (int i = 0; i < vec_sz; ++i) { - resT max = std::max(in1[i], in2[i]); - res[i] = max + std::log1p(std::exp(std::abs(diff[i]))); + if (std::isnan(in1[i]) || std::isnan(in2[i])) { + res[i] = std::numeric_limits::quiet_NaN(); + } + else { + resT max = std::max(in1[i], in2[i]); + res[i] = max + std::log1p(std::exp(std::abs(diff[i]))); + } } return res; From 1b5419f19a0be887743a316170ddb7a101d8ae37 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Mon, 7 Aug 2023 10:32:29 -0700 Subject: [PATCH 3/5] logaddexp now handles both NaNs and infinities correctly per array API --- .../kernels/elementwise_functions/logaddexp.hpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp index f2d2505f2c..b5f48dfffb 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp @@ -61,10 +61,10 @@ template struct LogAddExpFunctor resT operator()(const argT1 &in1, const argT2 &in2) { - if (std::isnan(in1) || std::isnan(in2)) { - return std::numeric_limits::quiet_NaN(); - } resT max = std::max(in1, in2); + if (std::isnan(max) || std::isinf(max)) { + return max; + } resT min = std::min(in1, in2); return max + std::log1p(std::exp(min - max)); } @@ -78,11 +78,11 @@ template struct LogAddExpFunctor #pragma unroll for (int i = 0; i < vec_sz; ++i) { - if (std::isnan(in1[i]) || std::isnan(in2[i])) { - res[i] = std::numeric_limits::quiet_NaN(); + resT max = std::max(in1[i], in2[i]); + if (std::isnan(max) || std::isinf(max)) { + res[i] = max; } else { - resT max = std::max(in1[i], in2[i]); res[i] = max + std::log1p(std::exp(std::abs(diff[i]))); } } From 3c874338321f18f5e554074c37ef4d1a30a3539a Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Mon, 7 Aug 2023 14:56:23 -0700 Subject: [PATCH 4/5] Broke up 'or' conditional in logaddexp logic for inf and NaN - 'or' conditions can sometimes cause wrong results when using the OS compiler --- .../kernels/elementwise_functions/logaddexp.hpp | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp index b5f48dfffb..91fca7fcb4 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp @@ -62,8 +62,13 @@ template struct LogAddExpFunctor resT operator()(const argT1 &in1, const argT2 &in2) { resT max = std::max(in1, in2); - if (std::isnan(max) || std::isinf(max)) { - return max; + if (std::isnan(max)) { + return std::numeric_limits::quiet_NaN(); + } + else { + if (std::isinf(max)) { + return std::numeric_limits::infinity(); + } } resT min = std::min(in1, in2); return max + std::log1p(std::exp(min - max)); @@ -79,8 +84,11 @@ template struct LogAddExpFunctor #pragma unroll for (int i = 0; i < vec_sz; ++i) { resT max = std::max(in1[i], in2[i]); - if (std::isnan(max) || std::isinf(max)) { - res[i] = max; + if (std::isnan(max)) { + res[i] = std::numeric_limits::quiet_NaN(); + } + else if (std::isinf(max)) { + res[i] = std::numeric_limits::infinity(); } else { res[i] = max + std::log1p(std::exp(std::abs(diff[i]))); From 3c0aeedfd824d6e8a4d665dd70d51e6cc65676d4 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 7 Aug 2023 11:42:10 -0500 Subject: [PATCH 5/5] Modularized logic implementing logaddexp If both arguments are -inf, the result is also -inf. --- .../elementwise_functions/logaddexp.hpp | 41 +++++++++---------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp index 91fca7fcb4..02375d5313 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp @@ -61,17 +61,7 @@ template struct LogAddExpFunctor resT operator()(const argT1 &in1, const argT2 &in2) { - resT max = std::max(in1, in2); - if (std::isnan(max)) { - return std::numeric_limits::quiet_NaN(); - } - else { - if (std::isinf(max)) { - return std::numeric_limits::infinity(); - } - } - resT min = std::min(in1, in2); - return max + std::log1p(std::exp(min - max)); + return impl(in1, in2); } template @@ -83,20 +73,29 @@ template struct LogAddExpFunctor #pragma unroll for (int i = 0; i < vec_sz; ++i) { - resT max = std::max(in1[i], in2[i]); - if (std::isnan(max)) { - res[i] = std::numeric_limits::quiet_NaN(); - } - else if (std::isinf(max)) { - res[i] = std::numeric_limits::infinity(); - } - else { - res[i] = max + std::log1p(std::exp(std::abs(diff[i]))); - } + res[i] = impl(in1[i], in2[i]); } return res; } + +private: + template T impl(T const &in1, T const &in2) + { + T max = std::max(in1, in2); + if (std::isnan(max)) { + return std::numeric_limits::quiet_NaN(); + } + else { + if (std::isinf(max)) { + // if both args are -inf, and hence max is -inf + // the result is -inf as well + return max; + } + } + T min = std::min(in1, in2); + return max + std::log1p(std::exp(min - max)); + } }; template