From 224455b636fc7fd1603d34471c3e86eaf56a72f9 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Tue, 4 Jul 2023 01:55:11 -0700 Subject: [PATCH 1/8] Implements logaddexp and hypot --- dpctl/tensor/__init__.py | 4 + dpctl/tensor/_elementwise_funcs.py | 59 +++- .../kernels/elementwise_functions/hypot.hpp | 215 +++++++++++++ .../elementwise_functions/logaddexp.hpp | 221 +++++++++++++ .../source/elementwise_functions.cpp | 155 ++++++++- dpctl/tests/elementwise/test_hypot.py | 193 ++++++++++++ dpctl/tests/elementwise/test_logaddexp.py | 298 ++++++++++++++++++ 7 files changed, 1140 insertions(+), 5 deletions(-) create mode 100644 dpctl/tensor/libtensor/include/kernels/elementwise_functions/hypot.hpp create mode 100644 dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp create mode 100644 dpctl/tests/elementwise/test_hypot.py create mode 100644 dpctl/tests/elementwise/test_logaddexp.py diff --git a/dpctl/tensor/__init__.py b/dpctl/tensor/__init__.py index a0a1a0b4df..fe77645389 100644 --- a/dpctl/tensor/__init__.py +++ b/dpctl/tensor/__init__.py @@ -105,6 +105,7 @@ floor_divide, greater, greater_equal, + hypot, imag, isfinite, isinf, @@ -115,6 +116,7 @@ log1p, log2, log10, + logaddexp, logical_and, logical_not, logical_or, @@ -222,6 +224,7 @@ "floor_divide", "greater", "greater_equal", + "hypot", "imag", "isfinite", "isinf", @@ -241,6 +244,7 @@ "not_equal", "positive", "pow", + "logaddexp", "proj", "real", "sin", diff --git a/dpctl/tensor/_elementwise_funcs.py b/dpctl/tensor/_elementwise_funcs.py index 06a5080a5e..e15add335c 100644 --- a/dpctl/tensor/_elementwise_funcs.py +++ b/dpctl/tensor/_elementwise_funcs.py @@ -661,7 +661,32 @@ ) # B15: ==== LOGADDEXP (x1, x2) -# FIXME: implement B15 +_logaddexp_docstring_ = """ +logaddexp(x1, x2, out=None, order='K') + +Calculates the ratio for each element `x1_i` of the input array `x1` with +the respective element `x2_i` of the input array `x2`. + +Args: + x1 (usm_ndarray): + First input array, expected to have numeric data type. + x2 (usm_ndarray): + Second input array, also expected to have numeric data type. + out ({None, usm_ndarray}, optional): + Output array to populate. + Array have the correct shape and the expected data type. + order ("C","F","A","K", optional): + Memory layout of the newly output array, if parameter `out` is `None`. + Default: "K". +Returns: + usm_narray: + An array containing the result of element-wise division. The data type + of the returned array is determined by the Type Promotion Rules. +""" + +logaddexp = BinaryElementwiseFunc( + "logaddexp", ti._logaddexp_result_type, ti._logaddexp, _logaddexp_docstring_ +) # B16: ==== LOGICAL_AND (x1, x2) _logical_and_docstring_ = """ @@ -1094,12 +1119,40 @@ order ("C","F","A","K", optional): Memory layout of the newly output array, if parameter `out` is `None`. Default: "K". +Returns: + usm_narray: + An array containing the result of element-wise division. The data type + of the returned array is determined by the Type Promotion Rules. +""" +trunc = UnaryElementwiseFunc( + "trunc", ti._trunc_result_type, ti._trunc, _trunc_docstring +) + + +# B24: ==== HYPOT (x1, x2) +_hypot_docstring_ = """ +hypot(x1, x2, out=None, order='K') + +Calculates the ratio for each element `x1_i` of the input array `x1` with +the respective element `x2_i` of the input array `x2`. + +Args: + x1 (usm_ndarray): + First input array, expected to have numeric data type. + x2 (usm_ndarray): + Second input array, also expected to have numeric data type. + out ({None, usm_ndarray}, optional): + Output array to populate. + Array have the correct shape and the expected data type. + order ("C","F","A","K", optional): + Memory layout of the newly output array, if parameter `out` is `None`. + Default: "K". Returns: usm_narray: An array containing the element-wise truncated value of input array. The returned array has the same data type as `x`. """ -trunc = UnaryElementwiseFunc( - "trunc", ti._trunc_result_type, ti._trunc, _trunc_docstring +hypot = BinaryElementwiseFunc( + "hypot", ti._hypot_result_type, ti._hypot, _hypot_docstring_ ) diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/hypot.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/hypot.hpp new file mode 100644 index 0000000000..00e8327559 --- /dev/null +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/hypot.hpp @@ -0,0 +1,215 @@ +//=== HYPOT.hpp - Binary function HYPOT ------ *-C++-*--/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines kernels for elementwise evaluation of HYPOT(x1, x2) +/// function. +//===---------------------------------------------------------------------===// + +#pragma once +#include +#include +#include +#include + +#include "utils/offset_utils.hpp" +#include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include + +namespace dpctl +{ +namespace tensor +{ +namespace kernels +{ +namespace hypot +{ + +namespace py = pybind11; +namespace td_ns = dpctl::tensor::type_dispatch; +namespace tu_ns = dpctl::tensor::type_utils; + +template struct HypotFunctor +{ + + using supports_sg_loadstore = std::negation< + std::disjunction, tu_ns::is_complex>>; + using supports_vec = std::negation< + std::disjunction, tu_ns::is_complex>>; + + resT operator()(const argT1 &in1, const argT2 &in2) + { + return std::hypot(in1, in2); + } + + template + sycl::vec operator()(const sycl::vec &in1, + const sycl::vec &in2) + { + auto res = sycl::hypot(in1, in2); + if constexpr (std::is_same_v) { + return res; + } + else { + using dpctl::tensor::type_utils::vec_cast; + + return vec_cast( + res); + } + } +}; + +template +using HypotContigFunctor = + elementwise_common::BinaryContigFunctor, + vec_sz, + n_vecs>; + +template +using HypotStridedFunctor = + elementwise_common::BinaryStridedFunctor>; + +template struct HypotOutputType +{ + using value_type = typename std::disjunction< // disjunction is C++17 + // feature, supported by DPC++ + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::DefaultResultEntry>::result_type; +}; + +template +class hypot_contig_kernel; + +template +sycl::event hypot_contig_impl(sycl::queue exec_q, + size_t nelems, + const char *arg1_p, + py::ssize_t arg1_offset, + const char *arg2_p, + py::ssize_t arg2_offset, + char *res_p, + py::ssize_t res_offset, + const std::vector &depends = {}) +{ + return elementwise_common::binary_contig_impl< + argTy1, argTy2, HypotOutputType, HypotContigFunctor, + hypot_contig_kernel>(exec_q, nelems, arg1_p, arg1_offset, arg2_p, + arg2_offset, res_p, res_offset, depends); +} + +template struct HypotContigFactory +{ + fnT get() + { + if constexpr (std::is_same_v< + typename HypotOutputType::value_type, void>) + { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = hypot_contig_impl; + return fn; + } + } +}; + +template struct HypotTypeMapFactory +{ + /*! @brief get typeid for output type of std::hypot(T1 x, T2 y) */ + std::enable_if_t::value, int> get() + { + using rT = typename HypotOutputType::value_type; + ; + return td_ns::GetTypeid{}.get(); + } +}; + +template +class hypot_strided_strided_kernel; + +template +sycl::event +hypot_strided_impl(sycl::queue exec_q, + size_t nelems, + int nd, + const py::ssize_t *shape_and_strides, + const char *arg1_p, + py::ssize_t arg1_offset, + const char *arg2_p, + py::ssize_t arg2_offset, + char *res_p, + py::ssize_t res_offset, + const std::vector &depends, + const std::vector &additional_depends) +{ + return elementwise_common::binary_strided_impl< + argTy1, argTy2, HypotOutputType, HypotStridedFunctor, + hypot_strided_strided_kernel>( + exec_q, nelems, nd, shape_and_strides, arg1_p, arg1_offset, arg2_p, + arg2_offset, res_p, res_offset, depends, additional_depends); +} + +template struct HypotStridedFactory +{ + fnT get() + { + if constexpr (std::is_same_v< + typename HypotOutputType::value_type, void>) + { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = hypot_strided_impl; + return fn; + } + } +}; + +} // namespace hypot +} // namespace kernels +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp new file mode 100644 index 0000000000..c39aec1877 --- /dev/null +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp @@ -0,0 +1,221 @@ +//=== logaddexp.hpp - Binary function LOGADDEXP ------ +//*-C++-*--/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines kernels for elementwise evaluation of LOGADDEXP(x1, x2) +/// function. +//===---------------------------------------------------------------------===// + +#pragma once +#include +#include +#include +#include + +#include "utils/offset_utils.hpp" +#include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include + +namespace dpctl +{ +namespace tensor +{ +namespace kernels +{ +namespace logaddexp +{ + +namespace py = pybind11; +namespace td_ns = dpctl::tensor::type_dispatch; +namespace tu_ns = dpctl::tensor::type_utils; + +using dpctl::tensor::type_utils::is_complex; +using dpctl::tensor::type_utils::vec_cast; + +template struct LogAddExpFunctor +{ + using supports_sg_loadstore = typename std::negation< + std::disjunction, tu_ns::is_complex>>; + using supports_vec = typename std::negation< + std::disjunction, tu_ns::is_complex>>; + + resT operator()(const argT1 &in1, const argT2 &in2) + { + return std::log(std::exp(in1) + std::exp(in2)); + } + + template + sycl::vec operator()(const sycl::vec &in1, + const sycl::vec &in2) + { + auto res = sycl::log(sycl::exp(in1) + sycl::exp(in2)); + if constexpr (std::is_same_v) { + return res; + } + else { + return vec_cast( + res); + } + } +}; + +template +using LogAddExpContigFunctor = elementwise_common::BinaryContigFunctor< + argT1, + argT2, + resT, + LogAddExpFunctor, + vec_sz, + n_vecs>; + +template +using LogAddExpStridedFunctor = elementwise_common::BinaryStridedFunctor< + argT1, + argT2, + resT, + IndexerT, + LogAddExpFunctor>; + +template struct LogAddExpOutputType +{ + using value_type = typename std::disjunction< // disjunction is C++17 + // feature, supported by DPC++ + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::DefaultResultEntry>::result_type; +}; + +template +class logaddexp_contig_kernel; + +template +sycl::event logaddexp_contig_impl(sycl::queue exec_q, + size_t nelems, + const char *arg1_p, + py::ssize_t arg1_offset, + const char *arg2_p, + py::ssize_t arg2_offset, + char *res_p, + py::ssize_t res_offset, + const std::vector &depends = {}) +{ + return elementwise_common::binary_contig_impl< + argTy1, argTy2, LogAddExpOutputType, LogAddExpContigFunctor, + logaddexp_contig_kernel>(exec_q, nelems, arg1_p, arg1_offset, arg2_p, + arg2_offset, res_p, res_offset, depends); +} + +template struct LogAddExpContigFactory +{ + fnT get() + { + if constexpr (std::is_same_v< + typename LogAddExpOutputType::value_type, + void>) + { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = logaddexp_contig_impl; + return fn; + } + } +}; + +template struct LogAddExpTypeMapFactory +{ + /*! @brief get typeid for output type of std::logaddexp(T1 x, T2 y) */ + std::enable_if_t::value, int> get() + { + using rT = typename LogAddExpOutputType::value_type; + ; + return td_ns::GetTypeid{}.get(); + } +}; + +template +class logaddexp_strided_strided_kernel; + +template +sycl::event +logaddexp_strided_impl(sycl::queue exec_q, + size_t nelems, + int nd, + const py::ssize_t *shape_and_strides, + const char *arg1_p, + py::ssize_t arg1_offset, + const char *arg2_p, + py::ssize_t arg2_offset, + char *res_p, + py::ssize_t res_offset, + const std::vector &depends, + const std::vector &logaddexpitional_depends) +{ + return elementwise_common::binary_strided_impl< + argTy1, argTy2, LogAddExpOutputType, LogAddExpStridedFunctor, + logaddexp_strided_strided_kernel>( + exec_q, nelems, nd, shape_and_strides, arg1_p, arg1_offset, arg2_p, + arg2_offset, res_p, res_offset, depends, logaddexpitional_depends); +} + +template struct LogAddExpStridedFactory +{ + fnT get() + { + if constexpr (std::is_same_v< + typename LogAddExpOutputType::value_type, + void>) + { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = logaddexp_strided_impl; + return fn; + } + } +}; + +template +class logaddexp_matrix_row_broadcast_sg_krn; + +} // namespace logaddexp +} // namespace kernels +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions.cpp b/dpctl/tensor/libtensor/source/elementwise_functions.cpp index 7246236d37..1eab41e3af 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions.cpp @@ -44,6 +44,7 @@ #include "kernels/elementwise_functions/floor_divide.hpp" #include "kernels/elementwise_functions/greater.hpp" #include "kernels/elementwise_functions/greater_equal.hpp" +#include "kernels/elementwise_functions/hypot.hpp" #include "kernels/elementwise_functions/imag.hpp" #include "kernels/elementwise_functions/isfinite.hpp" #include "kernels/elementwise_functions/isinf.hpp" @@ -54,6 +55,7 @@ #include "kernels/elementwise_functions/log10.hpp" #include "kernels/elementwise_functions/log1p.hpp" #include "kernels/elementwise_functions/log2.hpp" +#include "kernels/elementwise_functions/logaddexp.hpp" #include "kernels/elementwise_functions/logical_and.hpp" #include "kernels/elementwise_functions/logical_not.hpp" #include "kernels/elementwise_functions/logical_or.hpp" @@ -1149,7 +1151,39 @@ void populate_log10_dispatch_vectors(void) // B15: ==== LOGADDEXP (x1, x2) namespace impl { -// FIXME: add code for B15 +namespace logaddexp_fn_ns = dpctl::tensor::kernels::logaddexp; + +static binary_contig_impl_fn_ptr_t + logaddexp_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static int logaddexp_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + logaddexp_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +void populate_logaddexp_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = logaddexp_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::LogAddExpTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(logaddexp_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::LogAddExpStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(logaddexp_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::LogAddExpContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(logaddexp_contig_dispatch_table); +}; } // namespace impl // B16: ==== LOGICAL_AND (x1, x2) @@ -1895,6 +1929,46 @@ void populate_trunc_dispatch_vectors(void) } // namespace impl +// B24: ==== HYPOT (x1, x2) + +namespace impl +{ +namespace hypot_fn_ns = dpctl::tensor::kernels::hypot; + +static binary_contig_impl_fn_ptr_t + hypot_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static int hypot_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + hypot_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +void populate_hypot_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = hypot_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::HypotTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(hypot_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::HypotStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(hypot_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::HypotContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(hypot_contig_dispatch_table); +}; + +} // namespace impl + // ========================================================================================== // // @@ -2638,7 +2712,45 @@ void init_elementwise_functions(py::module_ m) } // B15: ==== LOGADDEXP (x1, x2) - // FIXME: + { + impl::populate_logaddexp_dispatch_tables(); + using impl::logaddexp_contig_dispatch_table; + using impl::logaddexp_output_id_table; + using impl::logaddexp_strided_dispatch_table; + + auto logaddexp_pyapi = [&](dpctl::tensor::usm_ndarray src1, + dpctl::tensor::usm_ndarray src2, + dpctl::tensor::usm_ndarray dst, + sycl::queue exec_q, + const std::vector &depends = + {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, logaddexp_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + logaddexp_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + logaddexp_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + auto logaddexp_result_type_pyapi = [&](py::dtype dtype1, + py::dtype dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + logaddexp_output_id_table); + }; + m.def("_logaddexp", logaddexp_pyapi, "", py::arg("src1"), + py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_logaddexp_result_type", logaddexp_result_type_pyapi, ""); + } // B16: ==== LOGICAL_AND (x1, x2) { @@ -3196,6 +3308,45 @@ void init_elementwise_functions(py::module_ m) }; m.def("_trunc_result_type", trunc_result_type_pyapi); } + + // B24: ==== HYPOT (x) + { + impl::populate_hypot_dispatch_tables(); + using impl::hypot_contig_dispatch_table; + using impl::hypot_output_id_table; + using impl::hypot_strided_dispatch_table; + + auto hypot_pyapi = [&](dpctl::tensor::usm_ndarray src1, + dpctl::tensor::usm_ndarray src2, + dpctl::tensor::usm_ndarray dst, + sycl::queue exec_q, + const std::vector &depends = {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, hypot_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + hypot_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + hypot_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + auto hypot_result_type_pyapi = [&](py::dtype dtype1, py::dtype dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + hypot_output_id_table); + }; + m.def("_hypot", hypot_pyapi, "", py::arg("src1"), py::arg("src2"), + py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_hypot_result_type", hypot_result_type_pyapi, ""); + } } } // namespace py_internal diff --git a/dpctl/tests/elementwise/test_hypot.py b/dpctl/tests/elementwise/test_hypot.py new file mode 100644 index 0000000000..a16bcc5d0c --- /dev/null +++ b/dpctl/tests/elementwise/test_hypot.py @@ -0,0 +1,193 @@ +# Data Parallel Control (dpctl) +# +# Copyright 2020-2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import ctypes + +import numpy as np +import pytest + +import dpctl +import dpctl.tensor as dpt +from dpctl.tests.helper import get_queue_or_skip, skip_if_dtype_not_supported + +from .utils import _compare_dtypes, _no_complex_dtypes, _usm_types + + +@pytest.mark.parametrize("op1_dtype", _no_complex_dtypes[1:]) +@pytest.mark.parametrize("op2_dtype", _no_complex_dtypes[1:]) +def test_hypot_dtype_matrix(op1_dtype, op2_dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(op1_dtype, q) + skip_if_dtype_not_supported(op2_dtype, q) + + sz = 127 + ar1 = dpt.zeros(sz, dtype=op1_dtype, sycl_queue=q) + ar2 = dpt.zeros_like(ar1, dtype=op2_dtype, sycl_queue=q) + + r = dpt.hypot(ar1, ar2) + assert isinstance(r, dpt.usm_ndarray) + expected = np.hypot( + np.zeros(1, dtype=op1_dtype), np.zeros(1, dtype=op2_dtype) + ) + assert _compare_dtypes(r.dtype, expected.dtype, sycl_queue=q) + assert r.shape == ar1.shape + assert (dpt.asnumpy(r) == expected.astype(r.dtype)).all() + assert r.sycl_queue == ar1.sycl_queue + + ar3 = dpt.zeros(sz, dtype=op1_dtype, sycl_queue=q) + ar4 = dpt.zeros(2 * sz, dtype=op2_dtype, sycl_queue=q) + + r = dpt.hypot(ar3[::-1], ar4[::2]) + assert isinstance(r, dpt.usm_ndarray) + expected = np.hypot( + np.zeros(1, dtype=op1_dtype), np.zeros(1, dtype=op2_dtype) + ) + assert _compare_dtypes(r.dtype, expected.dtype, sycl_queue=q) + assert r.shape == ar3.shape + assert (dpt.asnumpy(r) == expected.astype(r.dtype)).all() + + +@pytest.mark.parametrize("op1_usm_type", _usm_types) +@pytest.mark.parametrize("op2_usm_type", _usm_types) +def test_hypot_usm_type_matrix(op1_usm_type, op2_usm_type): + get_queue_or_skip() + + sz = 128 + ar1 = dpt.ones(sz, dtype="i4", usm_type=op1_usm_type) + ar2 = dpt.ones_like(ar1, dtype="i4", usm_type=op2_usm_type) + + r = dpt.hypot(ar1, ar2) + assert isinstance(r, dpt.usm_ndarray) + expected_usm_type = dpctl.utils.get_coerced_usm_type( + (op1_usm_type, op2_usm_type) + ) + assert r.usm_type == expected_usm_type + + +def test_hypot_order(): + get_queue_or_skip() + + ar1 = dpt.ones((20, 20), dtype="i4", order="C") + ar2 = dpt.ones((20, 20), dtype="i4", order="C") + r1 = dpt.hypot(ar1, ar2, order="C") + assert r1.flags.c_contiguous + r2 = dpt.hypot(ar1, ar2, order="F") + assert r2.flags.f_contiguous + r3 = dpt.hypot(ar1, ar2, order="A") + assert r3.flags.c_contiguous + r4 = dpt.hypot(ar1, ar2, order="K") + assert r4.flags.c_contiguous + + ar1 = dpt.ones((20, 20), dtype="i4", order="F") + ar2 = dpt.ones((20, 20), dtype="i4", order="F") + r1 = dpt.hypot(ar1, ar2, order="C") + assert r1.flags.c_contiguous + r2 = dpt.hypot(ar1, ar2, order="F") + assert r2.flags.f_contiguous + r3 = dpt.hypot(ar1, ar2, order="A") + assert r3.flags.f_contiguous + r4 = dpt.hypot(ar1, ar2, order="K") + assert r4.flags.f_contiguous + + ar1 = dpt.ones((40, 40), dtype="i4", order="C")[:20, ::-2] + ar2 = dpt.ones((40, 40), dtype="i4", order="C")[:20, ::-2] + r4 = dpt.hypot(ar1, ar2, order="K") + assert r4.strides == (20, -1) + + ar1 = dpt.ones((40, 40), dtype="i4", order="C")[:20, ::-2].mT + ar2 = dpt.ones((40, 40), dtype="i4", order="C")[:20, ::-2].mT + r4 = dpt.hypot(ar1, ar2, order="K") + assert r4.strides == (-1, 20) + + +def test_hypot_broadcasting(): + get_queue_or_skip() + + m = dpt.ones((100, 5), dtype="i4") + v = dpt.arange(1, 6, dtype="i4") + + r = dpt.hypot(m, v) + + expected = np.hypot( + np.ones((100, 5), dtype="i4"), np.arange(1, 6, dtype="i4") + ) + tol = 8 * np.finfo(r.dtype).resolution + assert np.allclose( + dpt.asnumpy(r), expected.astype(r.dtype), atol=tol, rtol=tol + ) + + r2 = dpt.hypot(v, m) + expected2 = np.hypot( + np.arange(1, 6, dtype="i4"), np.ones((100, 5), dtype="i4") + ) + assert np.allclose( + dpt.asnumpy(r2), expected2.astype(r2.dtype), atol=tol, rtol=tol + ) + + +@pytest.mark.parametrize("arr_dt", _no_complex_dtypes[1:]) +def test_hypot_python_scalar(arr_dt): + q = get_queue_or_skip() + skip_if_dtype_not_supported(arr_dt, q) + + X = dpt.ones((10, 10), dtype=arr_dt, sycl_queue=q) + py_ones = ( + bool(1), + int(1), + float(1), + np.float32(1), + ctypes.c_int(1), + ) + for sc in py_ones: + R = dpt.hypot(X, sc) + assert isinstance(R, dpt.usm_ndarray) + R = dpt.hypot(sc, X) + assert isinstance(R, dpt.usm_ndarray) + + +class MockArray: + def __init__(self, arr): + self.data_ = arr + + @property + def __sycl_usm_array_interface__(self): + return self.data_.__sycl_usm_array_interface__ + + +def test_hypot_mock_array(): + get_queue_or_skip() + a = dpt.arange(10) + b = dpt.ones(10) + c = MockArray(b) + r = dpt.hypot(a, c) + assert isinstance(r, dpt.usm_ndarray) + + +def test_hypot_canary_mock_array(): + get_queue_or_skip() + a = dpt.arange(10) + + class Canary: + def __init__(self): + pass + + @property + def __sycl_usm_array_interface__(self): + return None + + c = Canary() + with pytest.raises(ValueError): + dpt.hypot(a, c) diff --git a/dpctl/tests/elementwise/test_logaddexp.py b/dpctl/tests/elementwise/test_logaddexp.py new file mode 100644 index 0000000000..bf334eca9d --- /dev/null +++ b/dpctl/tests/elementwise/test_logaddexp.py @@ -0,0 +1,298 @@ +# Data Parallel Control (dpctl) +# +# Copyright 2020-2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import ctypes + +import numpy as np +import pytest +from numpy.testing import assert_raises_regex + +import dpctl +import dpctl.tensor as dpt +from dpctl.tests.helper import get_queue_or_skip, skip_if_dtype_not_supported +from dpctl.utils import ExecutionPlacementError + +from .utils import _compare_dtypes, _no_complex_dtypes, _usm_types + + +@pytest.mark.parametrize("op1_dtype", _no_complex_dtypes) +@pytest.mark.parametrize("op2_dtype", _no_complex_dtypes) +def test_logaddexp_dtype_matrix(op1_dtype, op2_dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(op1_dtype, q) + skip_if_dtype_not_supported(op2_dtype, q) + + sz = 127 + ar1 = dpt.ones(sz, dtype=op1_dtype) + ar2 = dpt.ones_like(ar1, dtype=op2_dtype) + + r = dpt.logaddexp(ar1, ar2) + assert isinstance(r, dpt.usm_ndarray) + expected = np.logaddexp(dpt.asnumpy(ar1), dpt.asnumpy(ar2)) + assert _compare_dtypes(r.dtype, expected.dtype, sycl_queue=q) + assert r.shape == ar1.shape + tol = 8 * np.finfo(r.dtype).resolution + assert np.allclose( + dpt.asnumpy(r), expected.astype(r.dtype), atol=tol, rtol=tol + ) + assert r.sycl_queue == ar1.sycl_queue + + ar3 = dpt.ones(sz, dtype=op1_dtype) + ar4 = dpt.ones(2 * sz, dtype=op2_dtype) + + r = dpt.logaddexp(ar3[::-1], ar4[::2]) + assert isinstance(r, dpt.usm_ndarray) + expected = np.logaddexp(dpt.asnumpy(ar3)[::-1], dpt.asnumpy(ar4)[::2]) + assert _compare_dtypes(r.dtype, expected.dtype, sycl_queue=q) + assert r.shape == ar3.shape + assert np.allclose( + dpt.asnumpy(r), expected.astype(r.dtype), atol=tol, rtol=tol + ) + + +@pytest.mark.parametrize("op1_usm_type", _usm_types) +@pytest.mark.parametrize("op2_usm_type", _usm_types) +def test_logaddexp_usm_type_matrix(op1_usm_type, op2_usm_type): + get_queue_or_skip() + + sz = 128 + ar1 = dpt.ones(sz, dtype="i4", usm_type=op1_usm_type) + ar2 = dpt.ones_like(ar1, dtype="i4", usm_type=op2_usm_type) + + r = dpt.logaddexp(ar1, ar2) + assert isinstance(r, dpt.usm_ndarray) + expected_usm_type = dpctl.utils.get_coerced_usm_type( + (op1_usm_type, op2_usm_type) + ) + assert r.usm_type == expected_usm_type + + +def test_logaddexp_order(): + get_queue_or_skip() + + test_shape = ( + 20, + 20, + ) + test_shape2 = tuple(2 * dim for dim in test_shape) + n = test_shape[-1] + + for dt1, dt2 in zip(["i4", "i4", "f4"], ["i4", "f4", "i4"]): + ar1 = dpt.ones(test_shape, dtype=dt1, order="C") + ar2 = dpt.ones(test_shape, dtype=dt2, order="C") + r1 = dpt.logaddexp(ar1, ar2, order="C") + assert r1.flags.c_contiguous + r2 = dpt.logaddexp(ar1, ar2, order="F") + assert r2.flags.f_contiguous + r3 = dpt.logaddexp(ar1, ar2, order="A") + assert r3.flags.c_contiguous + r4 = dpt.logaddexp(ar1, ar2, order="K") + assert r4.flags.c_contiguous + + ar1 = dpt.ones(test_shape, dtype=dt1, order="F") + ar2 = dpt.ones(test_shape, dtype=dt2, order="F") + r1 = dpt.logaddexp(ar1, ar2, order="C") + assert r1.flags.c_contiguous + r2 = dpt.logaddexp(ar1, ar2, order="F") + assert r2.flags.f_contiguous + r3 = dpt.logaddexp(ar1, ar2, order="A") + assert r3.flags.f_contiguous + r4 = dpt.logaddexp(ar1, ar2, order="K") + assert r4.flags.f_contiguous + + ar1 = dpt.ones(test_shape2, dtype=dt1, order="C")[:20, ::-2] + ar2 = dpt.ones(test_shape2, dtype=dt2, order="C")[:20, ::-2] + r4 = dpt.logaddexp(ar1, ar2, order="K") + assert r4.strides == (n, -1) + r5 = dpt.logaddexp(ar1, ar2, order="C") + assert r5.strides == (n, 1) + + ar1 = dpt.ones(test_shape2, dtype=dt1, order="C")[:20, ::-2].mT + ar2 = dpt.ones(test_shape2, dtype=dt2, order="C")[:20, ::-2].mT + r4 = dpt.logaddexp(ar1, ar2, order="K") + assert r4.strides == (-1, n) + r5 = dpt.logaddexp(ar1, ar2, order="C") + assert r5.strides == (n, 1) + + +def test_logaddexp_broadcasting(): + get_queue_or_skip() + + m = dpt.ones((100, 5), dtype="i4") + v = dpt.arange(1, 6, dtype="i4") + + r = dpt.logaddexp(m, v) + + expected = np.logaddexp( + np.ones((100, 5), dtype="i4"), np.arange(1, 6, dtype="i4") + ) + assert (dpt.asnumpy(r) == expected.astype(r.dtype)).all() + + r2 = dpt.logaddexp(v, m) + expected2 = np.logaddexp( + np.arange(1, 6, dtype="i4"), np.ones((100, 5), dtype="i4") + ) + assert (dpt.asnumpy(r2) == expected2.astype(r2.dtype)).all() + + +def test_logaddexp_broadcasting_error(): + get_queue_or_skip() + m = dpt.ones((10, 10), dtype="i4") + v = dpt.ones((3,), dtype="i4") + with pytest.raises(ValueError): + dpt.logaddexp(m, v) + + +@pytest.mark.parametrize("arr_dt", _no_complex_dtypes) +def test_logaddexp_python_scalar(arr_dt): + q = get_queue_or_skip() + skip_if_dtype_not_supported(arr_dt, q) + + X = dpt.zeros((10, 10), dtype=arr_dt, sycl_queue=q) + py_zeros = ( + bool(0), + int(0), + float(0), + complex(0), + np.float32(0), + ctypes.c_int(0), + ) + for sc in py_zeros: + R = dpt.logaddexp(X, sc) + assert isinstance(R, dpt.usm_ndarray) + R = dpt.logaddexp(sc, X) + assert isinstance(R, dpt.usm_ndarray) + + +class MockArray: + def __init__(self, arr): + self.data_ = arr + + @property + def __sycl_usm_array_interface__(self): + return self.data_.__sycl_usm_array_interface__ + + +def test_logaddexp_mock_array(): + get_queue_or_skip() + a = dpt.arange(10) + b = dpt.ones(10) + c = MockArray(b) + r = dpt.logaddexp(a, c) + assert isinstance(r, dpt.usm_ndarray) + + +def test_logaddexp_canary_mock_array(): + get_queue_or_skip() + a = dpt.arange(10) + + class Canary: + def __init__(self): + pass + + @property + def __sycl_usm_array_interface__(self): + return None + + c = Canary() + with pytest.raises(ValueError): + dpt.logaddexp(a, c) + + +def test_logaddexp_errors(): + get_queue_or_skip() + try: + gpu_queue = dpctl.SyclQueue("gpu") + except dpctl.SyclQueueCreationError: + pytest.skip("SyclQueue('gpu') failed, skipping") + try: + cpu_queue = dpctl.SyclQueue("cpu") + except dpctl.SyclQueueCreationError: + pytest.skip("SyclQueue('cpu') failed, skipping") + + ar1 = dpt.ones(2, dtype="float32", sycl_queue=gpu_queue) + ar2 = dpt.ones_like(ar1, sycl_queue=gpu_queue) + y = dpt.empty_like(ar1, sycl_queue=cpu_queue) + assert_raises_regex( + TypeError, + "Input and output allocation queues are not compatible", + dpt.logaddexp, + ar1, + ar2, + y, + ) + + ar1 = dpt.ones(2, dtype="float32") + ar2 = dpt.ones_like(ar1, dtype="int32") + y = dpt.empty(3) + assert_raises_regex( + TypeError, + "The shape of input and output arrays are inconsistent", + dpt.logaddexp, + ar1, + ar2, + y, + ) + + ar1 = dpt.ones(2, dtype="float32") + ar2 = dpt.ones_like(ar1, dtype="int32") + y = ar1 + assert_raises_regex( + TypeError, + "Input and output arrays have memory overlap", + dpt.logaddexp, + ar1, + ar2, + y, + ) + + ar1 = np.ones(2, dtype="float32") + ar2 = np.ones_like(ar1, dtype="int32") + assert_raises_regex( + ExecutionPlacementError, + "Execution placement can not be unambiguously inferred.*", + dpt.logaddexp, + ar1, + ar2, + ) + + ar1 = dpt.ones(2, dtype="float32") + ar2 = dpt.ones_like(ar1, dtype="int32") + y = np.empty_like(ar1) + assert_raises_regex( + TypeError, + "output array must be of usm_ndarray type", + dpt.logaddexp, + ar1, + ar2, + y, + ) + + +@pytest.mark.parametrize("dtype", _no_complex_dtypes) +def test_logaddexp_dtype_error( + dtype, +): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + ar1 = dpt.ones(5, dtype=dtype) + ar2 = dpt.ones_like(ar1, dtype="f4") + + y = dpt.zeros_like(ar1, dtype="int8") + assert_raises_regex( + TypeError, "Output array of type.*is needed", dpt.logaddexp, ar1, ar2, y + ) From a173b9cd8415b1e2d5adc42f52703fc546687dff Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Sun, 23 Jul 2023 13:19:50 -0500 Subject: [PATCH 2/8] BinaryElementwiseFunc get acceptance_fn parameter, set individually for tensor.divide The _find_buf_dtype2 must stay consistent with _find_buf_dtype in that for binary_fn that can be expressed via unary_fn(other_binary_fn(unary_fn(in1), unary_fn(in2))), e.g. logaddexp(x1, x2) == log(exp(x1) + exp(x2)) the promotion of integral types to fp type must be consistence with both evaluation. The special-casing, necessary for proper working of dpt.divide, where dpt.divide(dpt.asarray(1, dtype="i1"), dpt.asarray(1, dtype="u1")) should give default fp type resulted in logaddexp(dpt.asarray(1, dtype="i1"), dpt.asarray(1, dtype="u1")) returning array of different data-type than if evaluated alternatively, since dpt.exp(dpt.asarray(1, dtype="i1")) and dpt.exp(dpt.asarray(1, dtype="u1")) both gave float16 arrays, and the result was of float16 data type. Now, both behaviors are fixed: ``` In [5]: dpt.log(dpt.add(dpt.exp(dpt.asarray(1, dtype="u1")), dpt.exp(dpt.asarray(1, dtype="i1")))) Out[5]: usm_ndarray(1.693, dtype=float16) In [6]: dpt.logaddexp(dpt.asarray(1, dtype="u1"), dpt.asarray(1, dtype="i1")) Out[6]: usm_ndarray(1.693, dtype=float16) In [7]: dpt.divide(dpt.asarray(1, dtype="u1"), dpt.asarray(1, dtype="i1")) Out[7]: usm_ndarray(1., dtype=float32) ``` --- dpctl/tensor/_elementwise_common.py | 22 ++++++++++-- dpctl/tensor/_elementwise_funcs.py | 7 +++- dpctl/tensor/_type_utils.py | 56 +++++++++++++++++++++-------- 3 files changed, 66 insertions(+), 19 deletions(-) diff --git a/dpctl/tensor/_elementwise_common.py b/dpctl/tensor/_elementwise_common.py index 221c482969..409f97e5aa 100644 --- a/dpctl/tensor/_elementwise_common.py +++ b/dpctl/tensor/_elementwise_common.py @@ -27,6 +27,7 @@ from dpctl.utils import ExecutionPlacementError from ._type_utils import ( + _acceptance_fn_default, _empty_like_orderK, _empty_like_pair_orderK, _find_buf_dtype, @@ -48,6 +49,12 @@ def __init__(self, name, result_type_resolver_fn, unary_dp_impl_fn, docs): self.unary_fn_ = unary_dp_impl_fn self.__doc__ = docs + def __str__(self): + return f"<{self.__name__} '{self.name_}'>" + + def __repr__(self): + return f"<{self.__name__} '{self.name_}'>" + def __call__(self, x, out=None, order="K"): if not isinstance(x, dpt.usm_ndarray): raise TypeError(f"Expected dpctl.tensor.usm_ndarray, got {type(x)}") @@ -357,6 +364,7 @@ def __init__( binary_dp_impl_fn, docs, binary_inplace_fn=None, + acceptance_fn=None, ): self.__name__ = "BinaryElementwiseFunc" self.name_ = name @@ -364,12 +372,16 @@ def __init__( self.binary_fn_ = binary_dp_impl_fn self.binary_inplace_fn_ = binary_inplace_fn self.__doc__ = docs + if callable(acceptance_fn): + self.acceptance_fn_ = acceptance_fn + else: + self.acceptance_fn_ = _acceptance_fn_default def __str__(self): - return f"" + return f"<{self.__name__} '{self.name_}'>" def __repr__(self): - return f"" + return f"<{self.__name__} '{self.name_}'>" def __call__(self, o1, o2, out=None, order="K"): # FIXME: replace with check against base array @@ -445,7 +457,11 @@ def __call__(self, o1, o2, out=None, order="K"): o1_dtype, o2_dtype = _resolve_weak_types(o1_dtype, o2_dtype, sycl_dev) buf1_dt, buf2_dt, res_dt = _find_buf_dtype2( - o1_dtype, o2_dtype, self.result_type_resolver_fn_, sycl_dev + o1_dtype, + o2_dtype, + self.result_type_resolver_fn_, + sycl_dev, + acceptance_fn=self.acceptance_fn_, ) if res_dt is None: diff --git a/dpctl/tensor/_elementwise_funcs.py b/dpctl/tensor/_elementwise_funcs.py index e15add335c..d843041431 100644 --- a/dpctl/tensor/_elementwise_funcs.py +++ b/dpctl/tensor/_elementwise_funcs.py @@ -17,6 +17,7 @@ import dpctl.tensor._tensor_impl as ti from ._elementwise_common import BinaryElementwiseFunc, UnaryElementwiseFunc +from ._type_utils import _acceptance_fn_divide # U01: ==== ABS (x) _abs_docstring_ = """ @@ -215,7 +216,11 @@ """ divide = BinaryElementwiseFunc( - "divide", ti._divide_result_type, ti._divide, _divide_docstring_ + "divide", + ti._divide_result_type, + ti._divide, + _divide_docstring_, + acceptance_fn=_acceptance_fn_divide, ) # B09: ==== EQUAL (x1, x2) diff --git a/dpctl/tensor/_type_utils.py b/dpctl/tensor/_type_utils.py index 3ba6c316fb..fb2223f292 100644 --- a/dpctl/tensor/_type_utils.py +++ b/dpctl/tensor/_type_utils.py @@ -255,7 +255,34 @@ def _get_device_default_dtype(dt_kind, sycl_dev): raise RuntimeError -def _find_buf_dtype2(arg1_dtype, arg2_dtype, query_fn, sycl_dev): +def _acceptance_fn_default( + arg1_dtype, arg2_dtype, ret_buf1_dt, ret_buf2_dt, res_dt, sycl_dev +): + return True + + +def _acceptance_fn_divide( + arg1_dtype, arg2_dtype, ret_buf1_dt, ret_buf2_dt, res_dt, sycl_dev +): + # both are being promoted, if the kind of result is + # different than the kind of original input dtypes, + # we use default dtype for the resulting kind. + # This covers, e.g. (array_dtype_i1 / array_dtype_u1) + # result of which in divide is double (in NumPy), but + # regular type promotion rules peg at float16 + if (ret_buf1_dt.kind != arg1_dtype.kind) and ( + ret_buf2_dt.kind != arg2_dtype.kind + ): + default_dt = _get_device_default_dtype(res_dt.kind, sycl_dev) + if res_dt == default_dt: + return True + else: + return False + else: + return True + + +def _find_buf_dtype2(arg1_dtype, arg2_dtype, query_fn, sycl_dev, acceptance_fn): res_dt = query_fn(arg1_dtype, arg2_dtype) if res_dt: return None, None, res_dt @@ -275,21 +302,18 @@ def _find_buf_dtype2(arg1_dtype, arg2_dtype, query_fn, sycl_dev): if ret_buf1_dt is None or ret_buf2_dt is None: return ret_buf1_dt, ret_buf2_dt, res_dt else: - # both are being promoted, if the kind of result is - # different than the kind of original input dtypes, - # we must use default dtype for the resulting kind. - if (res_dt.kind != arg1_dtype.kind) and ( - res_dt.kind != arg2_dtype.kind - ): - default_dt = _get_device_default_dtype( - res_dt.kind, sycl_dev - ) - if res_dt == default_dt: - return ret_buf1_dt, ret_buf2_dt, res_dt - else: - continue - else: + acceptable = acceptance_fn( + arg1_dtype, + arg2_dtype, + ret_buf1_dt, + ret_buf2_dt, + res_dt, + sycl_dev, + ) + if acceptable: return ret_buf1_dt, ret_buf2_dt, res_dt + else: + continue return None, None, None @@ -318,4 +342,6 @@ def _find_inplace_dtype(lhs_dtype, rhs_dtype, query_fn, sycl_dev): "_empty_like_orderK", "_empty_like_pair_orderK", "_to_device_supported_dtype", + "_acceptance_fn_default", + "_acceptance_fn_divide", ] From 626dd4a972373ddaf5012e2283746044fa6087df Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Sun, 23 Jul 2023 13:28:43 -0500 Subject: [PATCH 3/8] Only try calling _inplace if inplace function is defined --- dpctl/tensor/_elementwise_common.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/dpctl/tensor/_elementwise_common.py b/dpctl/tensor/_elementwise_common.py index 409f97e5aa..78c79fb2ad 100644 --- a/dpctl/tensor/_elementwise_common.py +++ b/dpctl/tensor/_elementwise_common.py @@ -386,10 +386,11 @@ def __repr__(self): def __call__(self, o1, o2, out=None, order="K"): # FIXME: replace with check against base array # when views can be identified - if o1 is out: - return self._inplace(o1, o2) - elif o2 is out: - return self._inplace(o2, o1) + if self.binary_inplace_fn_: + if o1 is out: + return self._inplace(o1, o2) + elif o2 is out: + return self._inplace(o2, o1) if order not in ["K", "C", "F", "A"]: order = "K" From 22081fb423fe16abd4eca8412dcc3835e0ba5ec3 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Sun, 23 Jul 2023 13:29:15 -0500 Subject: [PATCH 4/8] Since logaddexp is not defined for complex types, do not try complex Python scalar --- dpctl/tests/elementwise/test_logaddexp.py | 1 - 1 file changed, 1 deletion(-) diff --git a/dpctl/tests/elementwise/test_logaddexp.py b/dpctl/tests/elementwise/test_logaddexp.py index bf334eca9d..355f0d0dc7 100644 --- a/dpctl/tests/elementwise/test_logaddexp.py +++ b/dpctl/tests/elementwise/test_logaddexp.py @@ -166,7 +166,6 @@ def test_logaddexp_python_scalar(arr_dt): bool(0), int(0), float(0), - complex(0), np.float32(0), ctypes.c_int(0), ) From d8437e649ca296beb24d6eee673803a82824d067 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Sun, 23 Jul 2023 13:30:06 -0500 Subject: [PATCH 5/8] Implement logaddexp in numerically stable way --- .../elementwise_functions/logaddexp.hpp | 21 +++++++++++-------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp index c39aec1877..f785e566de 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp @@ -62,22 +62,25 @@ template struct LogAddExpFunctor resT operator()(const argT1 &in1, const argT2 &in2) { - return std::log(std::exp(in1) + std::exp(in2)); + resT max = std::max(in1, in2); + resT min = std::min(in1, in2); + return max + std::log(resT(1) + std::exp(min - max)); } template sycl::vec operator()(const sycl::vec &in1, const sycl::vec &in2) { - auto res = sycl::log(sycl::exp(in1) + sycl::exp(in2)); - if constexpr (std::is_same_v) { - return res; - } - else { - return vec_cast( - res); + sycl::vec res; + auto diff = in1 - in2; + +#pragma unroll + for (int i = 0; i < vec_sz; ++i) { + resT max = std::max(in1[i], in2[i]); + res[i] = max + std::log(resT(1) + std::exp(std::abs(diff[i]))); } + + return res; } }; From c43caee67ba748e7b731f26991162821f2336d43 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Sun, 23 Jul 2023 18:42:14 -0500 Subject: [PATCH 6/8] Added missing argument --- dpctl/tests/elementwise/test_type_utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dpctl/tests/elementwise/test_type_utils.py b/dpctl/tests/elementwise/test_type_utils.py index 4c2fd3cb60..403e455c2e 100644 --- a/dpctl/tests/elementwise/test_type_utils.py +++ b/dpctl/tests/elementwise/test_type_utils.py @@ -155,7 +155,9 @@ def _denier_fn(dt1, dt2): dev = MockDevice(fp16, fp64) arg1_dt = dpt.float64 arg2_dt = dpt.complex64 - r = tu._find_buf_dtype2(arg1_dt, arg2_dt, _denier_fn, dev) + r = tu._find_buf_dtype2( + arg1_dt, arg2_dt, _denier_fn, dev, tu._acceptance_fn_default + ) assert r == ( None, None, From 3788569d3d8c2e19bcbd54305ad1fcc360579d2b Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 24 Jul 2023 06:20:33 -0500 Subject: [PATCH 7/8] Fixed computing allclose testing tolerance in test_dtype_matrix test When NumPy's result has coarser dtype than that of DPCTL we must use the largest resolution from each dtype. --- dpctl/tests/elementwise/test_logaddexp.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/dpctl/tests/elementwise/test_logaddexp.py b/dpctl/tests/elementwise/test_logaddexp.py index 355f0d0dc7..a693389354 100644 --- a/dpctl/tests/elementwise/test_logaddexp.py +++ b/dpctl/tests/elementwise/test_logaddexp.py @@ -18,7 +18,7 @@ import numpy as np import pytest -from numpy.testing import assert_raises_regex +from numpy.testing import assert_allclose, assert_raises_regex import dpctl import dpctl.tensor as dpt @@ -44,8 +44,10 @@ def test_logaddexp_dtype_matrix(op1_dtype, op2_dtype): expected = np.logaddexp(dpt.asnumpy(ar1), dpt.asnumpy(ar2)) assert _compare_dtypes(r.dtype, expected.dtype, sycl_queue=q) assert r.shape == ar1.shape - tol = 8 * np.finfo(r.dtype).resolution - assert np.allclose( + tol = 8 * max( + np.finfo(r.dtype).resolution, np.finfo(expected.dtype).resolution + ) + assert_allclose( dpt.asnumpy(r), expected.astype(r.dtype), atol=tol, rtol=tol ) assert r.sycl_queue == ar1.sycl_queue @@ -58,7 +60,7 @@ def test_logaddexp_dtype_matrix(op1_dtype, op2_dtype): expected = np.logaddexp(dpt.asnumpy(ar3)[::-1], dpt.asnumpy(ar4)[::2]) assert _compare_dtypes(r.dtype, expected.dtype, sycl_queue=q) assert r.shape == ar3.shape - assert np.allclose( + assert_allclose( dpt.asnumpy(r), expected.astype(r.dtype), atol=tol, rtol=tol ) From 67c8195f9d556e3948b2a308e29a66de3d3d7f40 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Mon, 24 Jul 2023 11:04:48 -0700 Subject: [PATCH 8/8] Calls of the type log(1 + X) changed to log1p(X) in logaddexp --- .../include/kernels/elementwise_functions/logaddexp.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp index f785e566de..c8f549205e 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp @@ -64,7 +64,7 @@ template struct LogAddExpFunctor { resT max = std::max(in1, in2); resT min = std::min(in1, in2); - return max + std::log(resT(1) + std::exp(min - max)); + return max + std::log1p(std::exp(min - max)); } template @@ -77,7 +77,7 @@ template struct LogAddExpFunctor #pragma unroll for (int i = 0; i < vec_sz; ++i) { resT max = std::max(in1[i], in2[i]); - res[i] = max + std::log(resT(1) + std::exp(std::abs(diff[i]))); + res[i] = max + std::log1p(std::exp(std::abs(diff[i]))); } return res;